diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9494,6 +9494,21 @@ return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); } + // (select c, 0, y) -> -!c & y + if (isNullConstant(TrueV)) { + SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, + ISD::getSetCCInverse(CCVal, VT)); + SDValue Neg = DAG.getNegative(C, DL, VT); + return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); + } + // (select c, y, 0) -> -c & y + if (isNullConstant(FalseV)) { + SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); + SDValue Neg = DAG.getNegative(C, DL, VT); + return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + } + + return SDValue(); } case RISCVISD::BR_CC: { diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -58,13 +58,10 @@ ; ; RV32I-LABEL: sltiu: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz a1, .LBB2_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB2_2: ; RV32I-NEXT: sltiu a0, a0, 3 +; RV32I-NEXT: snez a1, a1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret %1 = icmp ult i64 %a, 3 @@ -215,15 +212,18 @@ ; RV32I-NEXT: bltz a3, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a1, a0, a3 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB11_3 ; RV32I-NEXT: .LBB11_2: ; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: xori a3, a2, 31 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: srl a3, a4, a3 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: xori a4, a2, 31 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: .LBB11_3: ; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: slti a2, a3, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a0, a2, a0 ; RV32I-NEXT: ret %1 = shl i64 %a, %b ret i64 %1 @@ -300,15 +300,18 @@ ; RV32I-NEXT: bltz a3, .LBB15_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a1, a3 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB15_3 ; RV32I-NEXT: .LBB15_2: ; RV32I-NEXT: srl a0, a0, a2 -; RV32I-NEXT: xori a3, a2, 31 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: xori a4, a2, 31 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sll a4, a5, a4 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: .LBB15_3: ; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slti a2, a3, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret %1 = lshr i64 %a, %b ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -296,15 +296,13 @@ define i1 @bittest_constant_by_var_shr_i64(i64 %b) nounwind { ; RV32-LABEL: bittest_constant_by_var_shr_i64: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, -32 -; RV32-NEXT: bltz a1, .LBB12_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: andi a0, zero, 1 -; RV32-NEXT: ret -; RV32-NEXT: .LBB12_2: ; RV32-NEXT: lui a1, 301408 ; RV32-NEXT: addi a1, a1, 722 -; RV32-NEXT: srl a0, a1, a0 +; RV32-NEXT: srl a1, a1, a0 +; RV32-NEXT: addi a0, a0, -32 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: ret ; @@ -332,15 +330,13 @@ define i1 @bittest_constant_by_var_shl_i64(i64 %b) nounwind { ; RV32-LABEL: bittest_constant_by_var_shl_i64: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, -32 -; RV32-NEXT: bltz a1, .LBB13_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: andi a0, zero, 1 -; RV32-NEXT: ret -; RV32-NEXT: .LBB13_2: ; RV32-NEXT: lui a1, 301408 ; RV32-NEXT: addi a1, a1, 722 -; RV32-NEXT: srl a0, a1, a0 +; RV32-NEXT: srl a1, a1, a0 +; RV32-NEXT: addi a0, a0, -32 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/compress-opt-select.ll b/llvm/test/CodeGen/RISCV/compress-opt-select.ll --- a/llvm/test/CodeGen/RISCV/compress-opt-select.ll +++ b/llvm/test/CodeGen/RISCV/compress-opt-select.ll @@ -42,7 +42,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_small_pos(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, 20 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -56,7 +56,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_small_neg(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, -20 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -70,7 +70,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_small_edge_pos(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, 31 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -84,7 +84,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_small_edge_neg(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, -32 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -99,7 +99,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_medium_ledge_pos(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, 32 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -114,7 +114,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_medium_ledge_neg(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, -33 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -129,7 +129,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_medium_pos(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, 63 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -144,7 +144,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_medium_neg(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, -63 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -159,7 +159,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_medium_bedge_pos(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, 2047 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -174,7 +174,7 @@ ; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] define i32 @f_medium_bedge_neg(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, -2047 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -185,7 +185,7 @@ ; nothing to check. define i32 @f_big_ledge_pos(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, 2048 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } @@ -196,6 +196,6 @@ ; nothing to check. define i32 @f_big_ledge_neg(i32 %in0) minsize { %cmp = icmp CMPCOND i32 %in0, -2048 - %toRet = select i1 %cmp, i32 0, i32 42 + %toRet = select i1 %cmp, i32 -99, i32 42 ret i32 %toRet } diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -92,11 +92,11 @@ define i32 @fcvt_w_d_sat(double %a) nounwind { ; CHECKIFD-LABEL: fcvt_w_d_sat: ; CHECKIFD: # %bb.0: # %start -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB3_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz -; CHECKIFD-NEXT: .LBB3_2: # %start +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret ; ; RV32I-LABEL: fcvt_w_d_sat: @@ -108,7 +108,6 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 269824 @@ -116,44 +115,40 @@ ; RV32I-NEXT: lui a2, 1047552 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 794112 -; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: bltz s4, .LBB3_2 +; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: bltz s3, .LBB3_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: .LBB3_2: # %start -; RV32I-NEXT: bge s2, s3, .LBB3_4 +; RV32I-NEXT: blez s2, .LBB3_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: addi s5, a1, -1 +; RV32I-NEXT: addi s4, a1, -1 ; RV32I-NEXT: .LBB3_4: # %start ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bnez a0, .LBB3_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s2, s5 -; RV32I-NEXT: .LBB3_6: # %start -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -192,12 +187,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB3_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: .LBB3_6: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -291,22 +283,22 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind { ; RV32IFD-LABEL: fcvt_wu_d_sat: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB6_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB6_2: # %start +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_wu_d_sat: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB6_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: slli a0, a0, 32 ; RV64IFD-NEXT: srli a0, a0, 32 -; RV64IFD-NEXT: .LBB6_2: # %start ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_d_sat: @@ -355,29 +347,29 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunsdfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: bltz s2, .LBB6_2 -; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: .LBB6_2: # %start ; RV64I-NEXT: li a0, 1055 ; RV64I-NEXT: slli a0, a0, 31 ; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: slli a1, a0, 21 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: blez a0, .LBB6_4 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: bgtz a0, .LBB6_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: j .LBB6_3 +; RV64I-NEXT: .LBB6_2: ; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: .LBB6_4: # %start -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: .LBB6_3: # %start ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -575,18 +567,16 @@ ; RV32IFD-NEXT: addi a1, a3, -1 ; RV32IFD-NEXT: .LBB12_4: # %start ; RV32IFD-NEXT: feq.d a3, fs0, fs0 -; RV32IFD-NEXT: bnez a3, .LBB12_6 -; RV32IFD-NEXT: # %bb.5: # %start -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB12_7 -; RV32IFD-NEXT: .LBB12_6: -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: seqz a4, s0 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: .LBB12_7: # %start +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -595,99 +585,99 @@ ; ; RV64IFD-LABEL: fcvt_l_d_sat: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB12_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz -; RV64IFD-NEXT: .LBB12_2: # %start +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_l_d_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: lui a3, 802304 -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 278016 -; RV32I-NEXT: addi s4, a0, -1 +; RV32I-NEXT: addi s3, a0, -1 ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s6, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: lui a3, 802304 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __gedf2@plt +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfdi@plt -; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: bltz s3, .LBB12_2 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s4, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: call __unorddf2@plt +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: bgtz s7, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s6, s6, a0 +; RV32I-NEXT: slti a0, s5, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: and s2, s2, a0 ; RV32I-NEXT: .LBB12_2: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: mv s3, s0 -; RV32I-NEXT: bnez a0, .LBB12_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s3, s6 -; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lui a3, 802304 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: lui s6, 524288 -; RV32I-NEXT: bltz a0, .LBB12_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s6, s5 +; RV32I-NEXT: lui s5, 524288 +; RV32I-NEXT: bltz a0, .LBB12_4 +; RV32I-NEXT: # %bb.3: # %start +; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: .LBB12_4: # %start +; RV32I-NEXT: blez s3, .LBB12_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: addi s5, a1, -1 ; RV32I-NEXT: .LBB12_6: # %start -; RV32I-NEXT: bge s0, s4, .LBB12_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: addi s6, a1, -1 -; RV32I-NEXT: .LBB12_8: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bnez a0, .LBB12_10 -; RV32I-NEXT: # %bb.9: # %start -; RV32I-NEXT: mv s0, s6 -; RV32I-NEXT: .LBB12_10: # %start -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: call __unorddf2@plt +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_l_d_sat: @@ -726,12 +716,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB12_7 -; RV64I-NEXT: # %bb.6: # %start -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: .LBB12_7: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -791,7 +778,8 @@ ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi s0, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI14_0)(a2) @@ -799,9 +787,9 @@ ; RV32IFD-NEXT: flt.d a2, ft0, fs0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a1, a2 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -810,11 +798,11 @@ ; ; RV64IFD-LABEL: fcvt_lu_d_sat: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB14_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz -; RV64IFD-NEXT: .LBB14_2: # %start +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_lu_d_sat: @@ -1332,9 +1320,6 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32IFD-LABEL: fcvt_w_s_sat_i16: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB26_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: lui a0, %hi(.LCPI26_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI26_0)(a0) ; RV32IFD-NEXT: lui a0, %hi(.LCPI26_1) @@ -1342,14 +1327,14 @@ ; RV32IFD-NEXT: fmax.d ft0, fa0, ft0 ; RV32IFD-NEXT: fmin.d ft0, ft0, ft1 ; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz -; RV32IFD-NEXT: .LBB26_2: # %start +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_w_s_sat_i16: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB26_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: lui a0, %hi(.LCPI26_0) ; RV64IFD-NEXT: fld ft0, %lo(.LCPI26_0)(a0) ; RV64IFD-NEXT: lui a0, %hi(.LCPI26_1) @@ -1357,7 +1342,10 @@ ; RV64IFD-NEXT: fmax.d ft0, fa0, ft0 ; RV64IFD-NEXT: fmin.d ft0, ft0, ft1 ; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz -; RV64IFD-NEXT: .LBB26_2: # %start +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -1369,45 +1357,42 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 265728 ; RV32I-NEXT: addi a3, a0, -64 -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 790016 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfsi@plt -; RV32I-NEXT: lui s5, 1048568 -; RV32I-NEXT: bltz s4, .LBB26_2 +; RV32I-NEXT: lui s4, 1048568 +; RV32I-NEXT: bltz s3, .LBB26_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: .LBB26_2: # %start -; RV32I-NEXT: bge s0, s3, .LBB26_4 +; RV32I-NEXT: blez s2, .LBB26_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 -; RV32I-NEXT: addi s5, a0, -1 +; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: .LBB26_4: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bnez a0, .LBB26_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s0, s5 -; RV32I-NEXT: .LBB26_6: # %start -; RV32I-NEXT: slli a0, s0, 16 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s4 +; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1415,7 +1400,6 @@ ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1452,12 +1436,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB26_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB26_6: # %start -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1539,29 +1521,26 @@ ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bltz s3, .LBB28_2 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bgtz s3, .LBB28_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: slti a2, s0, 0 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a2, a2, a0 ; RV32I-NEXT: .LBB28_2: # %start -; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgtz s0, .LBB28_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: .LBB28_4: # %start -; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a0, a2, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1577,29 +1556,27 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunsdfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB28_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB28_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 8312 ; RV64I-NEXT: addiw a0, a0, -1 ; RV64I-NEXT: slli a1, a0, 37 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtdf2@plt ; RV64I-NEXT: lui a1, 16 ; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgtz a0, .LBB28_4 -; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB28_4: # %start +; RV64I-NEXT: bgtz a0, .LBB28_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a2, a0, s1 +; RV64I-NEXT: .LBB28_2: # %start ; RV64I-NEXT: and a0, a2, a1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1648,9 +1625,6 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32IFD-LABEL: fcvt_w_s_sat_i8: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB30_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: lui a0, %hi(.LCPI30_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI30_0)(a0) ; RV32IFD-NEXT: lui a0, %hi(.LCPI30_1) @@ -1658,14 +1632,14 @@ ; RV32IFD-NEXT: fmax.d ft0, fa0, ft0 ; RV32IFD-NEXT: fmin.d ft0, ft0, ft1 ; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz -; RV32IFD-NEXT: .LBB30_2: # %start +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_w_s_sat_i8: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB30_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: lui a0, %hi(.LCPI30_0) ; RV64IFD-NEXT: fld ft0, %lo(.LCPI30_0)(a0) ; RV64IFD-NEXT: lui a0, %hi(.LCPI30_1) @@ -1673,7 +1647,10 @@ ; RV64IFD-NEXT: fmax.d ft0, fa0, ft0 ; RV64IFD-NEXT: fmin.d ft0, ft0, ft1 ; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz -; RV64IFD-NEXT: .LBB30_2: # %start +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: @@ -1684,50 +1661,46 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a3, 263676 -; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 787968 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: li a1, -128 -; RV32I-NEXT: bltz s4, .LBB30_2 +; RV32I-NEXT: bltz s3, .LBB30_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB30_2: # %start -; RV32I-NEXT: li s4, 127 -; RV32I-NEXT: blt s0, s3, .LBB30_4 +; RV32I-NEXT: li s3, 127 +; RV32I-NEXT: bgtz s2, .LBB30_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, a1 +; RV32I-NEXT: mv s3, a1 ; RV32I-NEXT: .LBB30_4: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bnez a0, .LBB30_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s0, s4 -; RV32I-NEXT: .LBB30_6: # %start -; RV32I-NEXT: slli a0, s0, 24 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1763,12 +1736,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB30_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: .LBB30_6: # %start -; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1852,27 +1823,24 @@ ; RV32I-NEXT: lui a3, 263934 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bltz s3, .LBB32_2 +; RV32I-NEXT: li a1, 255 +; RV32I-NEXT: bgtz s3, .LBB32_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: slti a1, s0, 0 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, a0 ; RV32I-NEXT: .LBB32_2: # %start -; RV32I-NEXT: li a0, 255 -; RV32I-NEXT: bgtz s0, .LBB32_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB32_4: # %start -; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: andi a0, a1, 255 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1888,26 +1856,24 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunsdfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB32_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB32_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 131967 ; RV64I-NEXT: slli a1, a0, 33 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtdf2@plt ; RV64I-NEXT: li a1, 255 -; RV64I-NEXT: bgtz a0, .LBB32_4 -; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB32_4: # %start +; RV64I-NEXT: bgtz a0, .LBB32_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a1, a0, s1 +; RV64I-NEXT: .LBB32_2: # %start ; RV64I-NEXT: andi a0, a1, 255 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1924,22 +1890,22 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind { ; RV32IFD-LABEL: fcvt_wu_d_sat_zext: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB33_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB33_2: # %start +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_wu_d_sat_zext: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB33_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: slli a0, a0, 32 ; RV64IFD-NEXT: srli a0, a0, 32 -; RV64IFD-NEXT: .LBB33_2: # %start ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_d_sat_zext: @@ -1988,29 +1954,30 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunsdfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB33_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB33_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a0, 1055 ; RV64I-NEXT: slli a0, a0, 31 ; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: slli a1, a0, 21 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: blez a0, .LBB33_4 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: bgtz a0, .LBB33_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: j .LBB33_3 +; RV64I-NEXT: .LBB33_2: ; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: srli s2, a0, 32 -; RV64I-NEXT: .LBB33_4: # %start -; RV64I-NEXT: slli a0, s2, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: .LBB33_3: # %start +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -2026,11 +1993,11 @@ define signext i32 @fcvt_w_d_sat_sext(double %a) nounwind { ; CHECKIFD-LABEL: fcvt_w_d_sat_sext: ; CHECKIFD: # %bb.0: # %start -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB34_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz -; CHECKIFD-NEXT: .LBB34_2: # %start +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret ; ; RV32I-LABEL: fcvt_w_d_sat_sext: @@ -2042,7 +2009,6 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 269824 @@ -2050,44 +2016,40 @@ ; RV32I-NEXT: lui a2, 1047552 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 794112 -; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: bltz s4, .LBB34_2 +; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: bltz s3, .LBB34_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: .LBB34_2: # %start -; RV32I-NEXT: bge s2, s3, .LBB34_4 +; RV32I-NEXT: blez s2, .LBB34_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: addi s5, a1, -1 +; RV32I-NEXT: addi s4, a1, -1 ; RV32I-NEXT: .LBB34_4: # %start ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bnez a0, .LBB34_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s2, s5 -; RV32I-NEXT: .LBB34_6: # %start -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -2126,12 +2088,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB34_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB34_6: # %start -; RV64I-NEXT: sext.w a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -7,11 +7,11 @@ define signext i32 @test_floor_si32(double %x) { ; CHECKIFD-LABEL: test_floor_si32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB0_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rdn -; CHECKIFD-NEXT: .LBB0_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.floor.f64(double %x) %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) @@ -44,18 +44,16 @@ ; RV32IFD-NEXT: addi a1, a3, -1 ; RV32IFD-NEXT: .LBB1_4: ; RV32IFD-NEXT: feq.d a3, fs0, fs0 -; RV32IFD-NEXT: bnez a3, .LBB1_6 -; RV32IFD-NEXT: # %bb.5: -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB1_7 -; RV32IFD-NEXT: .LBB1_6: -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: seqz a4, s0 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: .LBB1_7: +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -64,11 +62,11 @@ ; ; RV64IFD-LABEL: test_floor_si64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB1_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rdn -; RV64IFD-NEXT: .LBB1_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.floor.f64(double %x) %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) @@ -78,11 +76,11 @@ define signext i32 @test_floor_ui32(double %x) { ; CHECKIFD-LABEL: test_floor_ui32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB2_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rdn -; CHECKIFD-NEXT: .LBB2_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.floor.f64(double %x) %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) @@ -100,7 +98,8 @@ ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi s0, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI3_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI3_0)(a2) @@ -108,9 +107,9 @@ ; RV32IFD-NEXT: flt.d a2, ft0, fs0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a1, a2 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -119,11 +118,11 @@ ; ; RV64IFD-LABEL: test_floor_ui64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB3_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rdn -; RV64IFD-NEXT: .LBB3_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.floor.f64(double %x) %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) @@ -133,11 +132,11 @@ define signext i32 @test_ceil_si32(double %x) { ; CHECKIFD-LABEL: test_ceil_si32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB4_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rup -; CHECKIFD-NEXT: .LBB4_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.ceil.f64(double %x) %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) @@ -170,18 +169,16 @@ ; RV32IFD-NEXT: addi a1, a3, -1 ; RV32IFD-NEXT: .LBB5_4: ; RV32IFD-NEXT: feq.d a3, fs0, fs0 -; RV32IFD-NEXT: bnez a3, .LBB5_6 -; RV32IFD-NEXT: # %bb.5: -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB5_7 -; RV32IFD-NEXT: .LBB5_6: -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: seqz a4, s0 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: .LBB5_7: +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -190,11 +187,11 @@ ; ; RV64IFD-LABEL: test_ceil_si64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB5_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rup -; RV64IFD-NEXT: .LBB5_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.ceil.f64(double %x) %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) @@ -204,11 +201,11 @@ define signext i32 @test_ceil_ui32(double %x) { ; CHECKIFD-LABEL: test_ceil_ui32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB6_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rup -; CHECKIFD-NEXT: .LBB6_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.ceil.f64(double %x) %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) @@ -226,7 +223,8 @@ ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi s0, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI7_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI7_0)(a2) @@ -234,9 +232,9 @@ ; RV32IFD-NEXT: flt.d a2, ft0, fs0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a1, a2 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -245,11 +243,11 @@ ; ; RV64IFD-LABEL: test_ceil_ui64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB7_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rup -; RV64IFD-NEXT: .LBB7_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.ceil.f64(double %x) %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) @@ -259,11 +257,11 @@ define signext i32 @test_trunc_si32(double %x) { ; CHECKIFD-LABEL: test_trunc_si32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB8_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz -; CHECKIFD-NEXT: .LBB8_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.trunc.f64(double %x) %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) @@ -296,18 +294,16 @@ ; RV32IFD-NEXT: addi a1, a3, -1 ; RV32IFD-NEXT: .LBB9_4: ; RV32IFD-NEXT: feq.d a3, fs0, fs0 -; RV32IFD-NEXT: bnez a3, .LBB9_6 -; RV32IFD-NEXT: # %bb.5: -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB9_7 -; RV32IFD-NEXT: .LBB9_6: -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: seqz a4, s0 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: .LBB9_7: +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -316,11 +312,11 @@ ; ; RV64IFD-LABEL: test_trunc_si64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB9_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz -; RV64IFD-NEXT: .LBB9_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.trunc.f64(double %x) %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) @@ -330,11 +326,11 @@ define signext i32 @test_trunc_ui32(double %x) { ; CHECKIFD-LABEL: test_trunc_ui32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB10_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rtz -; CHECKIFD-NEXT: .LBB10_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.trunc.f64(double %x) %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) @@ -352,7 +348,8 @@ ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi s0, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI11_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a2) @@ -360,9 +357,9 @@ ; RV32IFD-NEXT: flt.d a2, ft0, fs0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a1, a2 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -371,11 +368,11 @@ ; ; RV64IFD-LABEL: test_trunc_ui64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB11_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz -; RV64IFD-NEXT: .LBB11_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.trunc.f64(double %x) %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) @@ -385,11 +382,11 @@ define signext i32 @test_round_si32(double %x) { ; CHECKIFD-LABEL: test_round_si32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB12_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rmm -; CHECKIFD-NEXT: .LBB12_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.round.f64(double %x) %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) @@ -422,18 +419,16 @@ ; RV32IFD-NEXT: addi a1, a3, -1 ; RV32IFD-NEXT: .LBB13_4: ; RV32IFD-NEXT: feq.d a3, fs0, fs0 -; RV32IFD-NEXT: bnez a3, .LBB13_6 -; RV32IFD-NEXT: # %bb.5: -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB13_7 -; RV32IFD-NEXT: .LBB13_6: -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: seqz a4, s0 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: .LBB13_7: +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -442,11 +437,11 @@ ; ; RV64IFD-LABEL: test_round_si64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB13_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rmm -; RV64IFD-NEXT: .LBB13_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.round.f64(double %x) %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) @@ -456,11 +451,11 @@ define signext i32 @test_round_ui32(double %x) { ; CHECKIFD-LABEL: test_round_ui32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB14_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rmm -; CHECKIFD-NEXT: .LBB14_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.round.f64(double %x) %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) @@ -478,7 +473,8 @@ ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi s0, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI15_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI15_0)(a2) @@ -486,9 +482,9 @@ ; RV32IFD-NEXT: flt.d a2, ft0, fs0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a1, a2 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -497,11 +493,11 @@ ; ; RV64IFD-LABEL: test_round_ui64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB15_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rmm -; RV64IFD-NEXT: .LBB15_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.round.f64(double %x) %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) @@ -511,11 +507,11 @@ define signext i32 @test_roundeven_si32(double %x) { ; CHECKIFD-LABEL: test_roundeven_si32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB16_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rne -; CHECKIFD-NEXT: .LBB16_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.roundeven.f64(double %x) %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) @@ -548,18 +544,16 @@ ; RV32IFD-NEXT: addi a1, a3, -1 ; RV32IFD-NEXT: .LBB17_4: ; RV32IFD-NEXT: feq.d a3, fs0, fs0 -; RV32IFD-NEXT: bnez a3, .LBB17_6 -; RV32IFD-NEXT: # %bb.5: -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB17_7 -; RV32IFD-NEXT: .LBB17_6: -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: seqz a4, s0 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: .LBB17_7: +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -568,11 +562,11 @@ ; ; RV64IFD-LABEL: test_roundeven_si64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB17_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rne -; RV64IFD-NEXT: .LBB17_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.roundeven.f64(double %x) %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) @@ -582,11 +576,11 @@ define signext i32 @test_roundeven_ui32(double %x) { ; CHECKIFD-LABEL: test_roundeven_ui32: ; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: feq.d a0, fa0, fa0 -; CHECKIFD-NEXT: beqz a0, .LBB18_2 -; CHECKIFD-NEXT: # %bb.1: ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rne -; CHECKIFD-NEXT: .LBB18_2: +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 ; CHECKIFD-NEXT: ret %a = call double @llvm.roundeven.f64(double %x) %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) @@ -604,7 +598,8 @@ ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi s0, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI19_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a2) @@ -612,9 +607,9 @@ ; RV32IFD-NEXT: flt.d a2, ft0, fs0 ; RV32IFD-NEXT: seqz a2, a2 ; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a1, a2 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -623,11 +618,11 @@ ; ; RV64IFD-LABEL: test_roundeven_ui64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB19_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rne -; RV64IFD-NEXT: .LBB19_2: +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %a = call double @llvm.roundeven.f64(double %x) %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -38,11 +38,11 @@ define i32 @fcvt_w_s_sat(float %a) nounwind { ; CHECKIF-LABEL: fcvt_w_s_sat: ; CHECKIF: # %bb.0: # %start -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB1_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz -; CHECKIF-NEXT: .LBB1_2: # %start +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat: @@ -76,12 +76,9 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bnez a1, .LBB1_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: .LBB1_6: # %start +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -121,12 +118,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB1_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: .LBB1_6: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -218,22 +212,22 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV32IF-LABEL: fcvt_wu_s_sat: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: feq.s a0, fa0, fa0 -; RV32IF-NEXT: beqz a0, .LBB4_2 -; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz -; RV32IF-NEXT: .LBB4_2: # %start +; RV32IF-NEXT: feq.s a1, fa0, fa0 +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB4_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: slli a0, a0, 32 ; RV64IF-NEXT: srli a0, a0, 32 -; RV64IF-NEXT: .LBB4_2: # %start ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat: @@ -275,27 +269,27 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: bltz s2, .LBB4_2 -; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: .LBB4_2: # %start ; RV64I-NEXT: lui a0, 325632 ; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: blez a0, .LBB4_4 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: bgtz a0, .LBB4_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: j .LBB4_3 +; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: .LBB4_4: # %start -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: .LBB4_3: # %start ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -555,18 +549,16 @@ ; RV32IF-NEXT: addi a1, a3, -1 ; RV32IF-NEXT: .LBB12_4: # %start ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: bnez a3, .LBB12_6 -; RV32IF-NEXT: # %bb.5: # %start -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB12_7 -; RV32IF-NEXT: .LBB12_6: -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: seqz a4, s0 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: .LBB12_7: # %start +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -575,11 +567,11 @@ ; ; RV64IF-LABEL: fcvt_l_s_sat: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB12_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz -; RV64IF-NEXT: .LBB12_2: # %start +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_l_s_sat: @@ -592,60 +584,54 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, 389120 -; RV32I-NEXT: addi s2, a0, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s5, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __fixsfdi@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv s4, a1 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 913408 -; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB12_2 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixsfdi@plt +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __unordsf2@plt +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: lui a0, 389120 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: bgtz a0, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s5, s5, s3 +; RV32I-NEXT: slti a0, s4, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and s1, s1, a0 ; RV32I-NEXT: .LBB12_2: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv s3, s0 -; RV32I-NEXT: bnez a0, .LBB12_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s3, s5 -; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: lui a1, 913408 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: lui s6, 524288 ; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: bltz a0, .LBB12_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s5, s4 -; RV32I-NEXT: .LBB12_6: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: bltz a0, .LBB12_4 +; RV32I-NEXT: # %bb.3: # %start +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: .LBB12_4: # %start +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s0, a0, .LBB12_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: addi s5, s6, -1 -; RV32I-NEXT: .LBB12_8: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bnez a0, .LBB12_10 -; RV32I-NEXT: # %bb.9: # %start -; RV32I-NEXT: mv s0, s5 -; RV32I-NEXT: .LBB12_10: # %start -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: blez a0, .LBB12_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: addi s4, s5, -1 +; RV32I-NEXT: .LBB12_6: # %start +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __unordsf2@plt +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a1, a0, s4 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -653,7 +639,6 @@ ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -690,12 +675,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB12_7 -; RV64I-NEXT: # %bb.6: # %start -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: .LBB12_7: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -755,7 +737,8 @@ ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi s0, a0, -1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI14_0)(a2) @@ -763,9 +746,9 @@ ; RV32IF-NEXT: flt.s a2, ft0, fs0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -774,11 +757,11 @@ ; ; RV64IF-LABEL: fcvt_lu_s_sat: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB14_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rtz -; RV64IF-NEXT: .LBB14_2: # %start +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_lu_s_sat: @@ -1198,9 +1181,6 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV32IF-LABEL: fcvt_w_s_sat_i16: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: feq.s a0, fa0, fa0 -; RV32IF-NEXT: beqz a0, .LBB24_2 -; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: lui a0, %hi(.LCPI24_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI24_0)(a0) ; RV32IF-NEXT: lui a0, %hi(.LCPI24_1) @@ -1208,14 +1188,14 @@ ; RV32IF-NEXT: fmax.s ft0, fa0, ft0 ; RV32IF-NEXT: fmin.s ft0, ft0, ft1 ; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IF-NEXT: .LBB24_2: # %start +; RV32IF-NEXT: feq.s a1, fa0, fa0 +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_w_s_sat_i16: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB24_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: lui a0, %hi(.LCPI24_0) ; RV64IF-NEXT: flw ft0, %lo(.LCPI24_0)(a0) ; RV64IF-NEXT: lui a0, %hi(.LCPI24_1) @@ -1223,7 +1203,10 @@ ; RV64IF-NEXT: fmax.s ft0, fa0, ft0 ; RV64IF-NEXT: fmin.s ft0, ft0, ft1 ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB24_2: # %start +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -1256,12 +1239,10 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bnez a0, .LBB24_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: .LBB24_6: # %start -; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1300,12 +1281,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB24_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB24_6: # %start -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1379,28 +1358,26 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: bltz s1, .LBB26_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: .LBB26_2: # %start +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 292864 ; RV32I-NEXT: addi a1, a0, -256 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: lui a1, 16 ; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgtz a0, .LBB26_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB26_4: # %start +; RV32I-NEXT: bgtz a0, .LBB26_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: slti a0, s0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a2, a0, s1 +; RV32I-NEXT: .LBB26_2: # %start ; RV32I-NEXT: and a0, a2, a1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1416,28 +1393,26 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB26_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB26_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 292864 ; RV64I-NEXT: addiw a1, a0, -256 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: lui a1, 16 ; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgtz a0, .LBB26_4 -; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB26_4: # %start +; RV64I-NEXT: bgtz a0, .LBB26_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a2, a0, s1 +; RV64I-NEXT: .LBB26_2: # %start ; RV64I-NEXT: and a0, a2, a1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1486,9 +1461,6 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind { ; RV32IF-LABEL: fcvt_w_s_sat_i8: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: feq.s a0, fa0, fa0 -; RV32IF-NEXT: beqz a0, .LBB28_2 -; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: lui a0, %hi(.LCPI28_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI28_0)(a0) ; RV32IF-NEXT: lui a0, %hi(.LCPI28_1) @@ -1496,14 +1468,14 @@ ; RV32IF-NEXT: fmax.s ft0, fa0, ft0 ; RV32IF-NEXT: fmin.s ft0, ft0, ft1 ; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IF-NEXT: .LBB28_2: # %start +; RV32IF-NEXT: feq.s a1, fa0, fa0 +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_w_s_sat_i8: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB28_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: lui a0, %hi(.LCPI28_0) ; RV64IF-NEXT: flw ft0, %lo(.LCPI28_0)(a0) ; RV64IF-NEXT: lui a0, %hi(.LCPI28_1) @@ -1511,7 +1483,10 @@ ; RV64IF-NEXT: fmax.s ft0, fa0, ft0 ; RV64IF-NEXT: fmin.s ft0, ft0, ft1 ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB28_2: # %start +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: @@ -1543,12 +1518,10 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bnez a0, .LBB28_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: .LBB28_6: # %start -; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1586,12 +1559,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB28_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: .LBB28_6: # %start -; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1665,25 +1636,23 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: bltz s1, .LBB30_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: .LBB30_2: # %start +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a1, 276464 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: li a1, 255 -; RV32I-NEXT: bgtz a0, .LBB30_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: .LBB30_4: # %start +; RV32I-NEXT: bgtz a0, .LBB30_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: slti a0, s0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: .LBB30_2: # %start ; RV32I-NEXT: andi a0, a1, 255 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1699,25 +1668,23 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB30_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB30_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a1, 276464 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: li a1, 255 -; RV64I-NEXT: bgtz a0, .LBB30_4 -; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB30_4: # %start +; RV64I-NEXT: bgtz a0, .LBB30_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a1, a0, s1 +; RV64I-NEXT: .LBB30_2: # %start ; RV64I-NEXT: andi a0, a1, 255 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1734,22 +1701,22 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV32IF-LABEL: fcvt_wu_s_sat_zext: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: feq.s a0, fa0, fa0 -; RV32IF-NEXT: beqz a0, .LBB31_2 -; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz -; RV32IF-NEXT: .LBB31_2: # %start +; RV32IF-NEXT: feq.s a1, fa0, fa0 +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat_zext: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB31_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: slli a0, a0, 32 ; RV64IF-NEXT: srli a0, a0, 32 -; RV64IF-NEXT: .LBB31_2: # %start ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_zext: @@ -1791,27 +1758,28 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB31_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB31_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 325632 ; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: blez a0, .LBB31_4 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: bgtz a0, .LBB31_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: j .LBB31_3 +; RV64I-NEXT: .LBB31_2: ; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: srli s2, a0, 32 -; RV64I-NEXT: .LBB31_4: # %start -; RV64I-NEXT: slli a0, s2, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: .LBB31_3: # %start +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1827,11 +1795,11 @@ define signext i32 @fcvt_w_s_sat_sext(float %a) nounwind { ; CHECKIF-LABEL: fcvt_w_s_sat_sext: ; CHECKIF: # %bb.0: # %start -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB32_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz -; CHECKIF-NEXT: .LBB32_2: # %start +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_sext: @@ -1865,12 +1833,9 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bnez a1, .LBB32_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: .LBB32_6: # %start +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1910,12 +1875,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB32_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB32_6: # %start -; RV64I-NEXT: sext.w a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -7,11 +7,11 @@ define signext i32 @test_floor_si32(float %x) { ; CHECKIF-LABEL: test_floor_si32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB0_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rdn -; CHECKIF-NEXT: .LBB0_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.floor.f32(float %x) %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) @@ -44,18 +44,16 @@ ; RV32IF-NEXT: addi a1, a3, -1 ; RV32IF-NEXT: .LBB1_4: ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: bnez a3, .LBB1_6 -; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB1_7 -; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: seqz a4, s0 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: .LBB1_7: +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -64,11 +62,11 @@ ; ; RV64IF-LABEL: test_floor_si64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB1_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rdn -; RV64IF-NEXT: .LBB1_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.floor.f32(float %x) %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) @@ -78,11 +76,11 @@ define signext i32 @test_floor_ui32(float %x) { ; CHECKIF-LABEL: test_floor_ui32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB2_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rdn -; CHECKIF-NEXT: .LBB2_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.floor.f32(float %x) %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) @@ -100,7 +98,8 @@ ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi s0, a0, -1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI3_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a2) @@ -108,9 +107,9 @@ ; RV32IF-NEXT: flt.s a2, ft0, fs0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -119,11 +118,11 @@ ; ; RV64IF-LABEL: test_floor_ui64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB3_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rdn -; RV64IF-NEXT: .LBB3_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.floor.f32(float %x) %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) @@ -133,11 +132,11 @@ define signext i32 @test_ceil_si32(float %x) { ; CHECKIF-LABEL: test_ceil_si32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB4_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rup -; CHECKIF-NEXT: .LBB4_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.ceil.f32(float %x) %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) @@ -170,18 +169,16 @@ ; RV32IF-NEXT: addi a1, a3, -1 ; RV32IF-NEXT: .LBB5_4: ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: bnez a3, .LBB5_6 -; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB5_7 -; RV32IF-NEXT: .LBB5_6: -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: seqz a4, s0 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: .LBB5_7: +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -190,11 +187,11 @@ ; ; RV64IF-LABEL: test_ceil_si64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB5_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rup -; RV64IF-NEXT: .LBB5_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.ceil.f32(float %x) %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) @@ -204,11 +201,11 @@ define signext i32 @test_ceil_ui32(float %x) { ; CHECKIF-LABEL: test_ceil_ui32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB6_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rup -; CHECKIF-NEXT: .LBB6_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.ceil.f32(float %x) %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) @@ -226,7 +223,8 @@ ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi s0, a0, -1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI7_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a2) @@ -234,9 +232,9 @@ ; RV32IF-NEXT: flt.s a2, ft0, fs0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -245,11 +243,11 @@ ; ; RV64IF-LABEL: test_ceil_ui64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB7_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rup -; RV64IF-NEXT: .LBB7_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.ceil.f32(float %x) %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) @@ -259,11 +257,11 @@ define signext i32 @test_trunc_si32(float %x) { ; CHECKIF-LABEL: test_trunc_si32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB8_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz -; CHECKIF-NEXT: .LBB8_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.trunc.f32(float %x) %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) @@ -296,18 +294,16 @@ ; RV32IF-NEXT: addi a1, a3, -1 ; RV32IF-NEXT: .LBB9_4: ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: bnez a3, .LBB9_6 -; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB9_7 -; RV32IF-NEXT: .LBB9_6: -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: seqz a4, s0 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: .LBB9_7: +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -316,11 +312,11 @@ ; ; RV64IF-LABEL: test_trunc_si64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB9_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz -; RV64IF-NEXT: .LBB9_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.trunc.f32(float %x) %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) @@ -330,11 +326,11 @@ define signext i32 @test_trunc_ui32(float %x) { ; CHECKIF-LABEL: test_trunc_ui32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB10_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rtz -; CHECKIF-NEXT: .LBB10_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.trunc.f32(float %x) %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) @@ -352,7 +348,8 @@ ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi s0, a0, -1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI11_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a2) @@ -360,9 +357,9 @@ ; RV32IF-NEXT: flt.s a2, ft0, fs0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -371,11 +368,11 @@ ; ; RV64IF-LABEL: test_trunc_ui64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB11_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rtz -; RV64IF-NEXT: .LBB11_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.trunc.f32(float %x) %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) @@ -385,11 +382,11 @@ define signext i32 @test_round_si32(float %x) { ; CHECKIF-LABEL: test_round_si32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB12_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rmm -; CHECKIF-NEXT: .LBB12_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.round.f32(float %x) %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) @@ -422,18 +419,16 @@ ; RV32IF-NEXT: addi a1, a3, -1 ; RV32IF-NEXT: .LBB13_4: ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: bnez a3, .LBB13_6 -; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB13_7 -; RV32IF-NEXT: .LBB13_6: -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: seqz a4, s0 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: .LBB13_7: +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -442,11 +437,11 @@ ; ; RV64IF-LABEL: test_round_si64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB13_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rmm -; RV64IF-NEXT: .LBB13_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.round.f32(float %x) %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) @@ -456,11 +451,11 @@ define signext i32 @test_round_ui32(float %x) { ; CHECKIF-LABEL: test_round_ui32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB14_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rmm -; CHECKIF-NEXT: .LBB14_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.round.f32(float %x) %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) @@ -478,7 +473,8 @@ ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi s0, a0, -1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a2) @@ -486,9 +482,9 @@ ; RV32IF-NEXT: flt.s a2, ft0, fs0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -497,11 +493,11 @@ ; ; RV64IF-LABEL: test_round_ui64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB15_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rmm -; RV64IF-NEXT: .LBB15_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.round.f32(float %x) %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) @@ -511,11 +507,11 @@ define signext i32 @test_roundeven_si32(float %x) { ; CHECKIF-LABEL: test_roundeven_si32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB16_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rne -; CHECKIF-NEXT: .LBB16_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.roundeven.f32(float %x) %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) @@ -548,18 +544,16 @@ ; RV32IF-NEXT: addi a1, a3, -1 ; RV32IF-NEXT: .LBB17_4: ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: bnez a3, .LBB17_6 -; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB17_7 -; RV32IF-NEXT: .LBB17_6: -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: seqz a4, s0 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: .LBB17_7: +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -568,11 +562,11 @@ ; ; RV64IF-LABEL: test_roundeven_si64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB17_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rne -; RV64IF-NEXT: .LBB17_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.roundeven.f32(float %x) %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) @@ -582,11 +576,11 @@ define signext i32 @test_roundeven_ui32(float %x) { ; CHECKIF-LABEL: test_roundeven_ui32: ; CHECKIF: # %bb.0: -; CHECKIF-NEXT: feq.s a0, fa0, fa0 -; CHECKIF-NEXT: beqz a0, .LBB18_2 -; CHECKIF-NEXT: # %bb.1: ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rne -; CHECKIF-NEXT: .LBB18_2: +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 ; CHECKIF-NEXT: ret %a = call float @llvm.roundeven.f32(float %x) %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) @@ -604,7 +598,8 @@ ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi s0, a0, -1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI19_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a2) @@ -612,9 +607,9 @@ ; RV32IF-NEXT: flt.s a2, ft0, fs0 ; RV32IF-NEXT: seqz a2, a2 ; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -623,11 +618,11 @@ ; ; RV64IF-LABEL: test_roundeven_ui64: ; RV64IF: # %bb.0: -; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: beqz a0, .LBB19_2 -; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rne -; RV64IF-NEXT: .LBB19_2: +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %a = call float @llvm.roundeven.f32(float %x) %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -2504,6 +2504,9 @@ ; RV32-NEXT: j .LBB49_2 ; RV32-NEXT: .LBB49_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp @@ -2513,28 +2516,26 @@ ; RV32-NEXT: call __atomic_compare_exchange_8@plt ; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) -; RV32-NEXT: bnez a0, .LBB49_7 +; RV32-NEXT: bnez a0, .LBB49_6 ; RV32-NEXT: .LBB49_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: beqz a1, .LBB49_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 ; RV32-NEXT: sgtz a0, a1 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bnez a0, .LBB49_1 ; RV32-NEXT: j .LBB49_5 ; RV32-NEXT: .LBB49_4: # in Loop: Header=BB49_2 Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 ; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: .LBB49_5: # %atomicrmw.start -; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 ; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bnez a0, .LBB49_1 -; RV32-NEXT: # %bb.6: # %atomicrmw.start +; RV32-NEXT: .LBB49_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB49_1 -; RV32-NEXT: .LBB49_7: # %atomicrmw.end +; RV32-NEXT: .LBB49_6: # %atomicrmw.end ; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2599,6 +2600,9 @@ ; RV32-NEXT: j .LBB50_2 ; RV32-NEXT: .LBB50_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp @@ -2608,27 +2612,25 @@ ; RV32-NEXT: call __atomic_compare_exchange_8@plt ; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) -; RV32-NEXT: bnez a0, .LBB50_7 +; RV32-NEXT: bnez a0, .LBB50_6 ; RV32-NEXT: .LBB50_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: beqz a1, .LBB50_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 ; RV32-NEXT: slti a0, a1, 0 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bnez a0, .LBB50_1 ; RV32-NEXT: j .LBB50_5 ; RV32-NEXT: .LBB50_4: # in Loop: Header=BB50_2 Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: .LBB50_5: # %atomicrmw.start -; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 ; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bnez a0, .LBB50_1 -; RV32-NEXT: # %bb.6: # %atomicrmw.start +; RV32-NEXT: .LBB50_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB50_1 -; RV32-NEXT: .LBB50_7: # %atomicrmw.end +; RV32-NEXT: .LBB50_6: # %atomicrmw.end ; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2696,6 +2698,9 @@ ; RV32-NEXT: j .LBB51_2 ; RV32-NEXT: .LBB51_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp @@ -2705,28 +2710,26 @@ ; RV32-NEXT: call __atomic_compare_exchange_8@plt ; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) -; RV32-NEXT: bnez a0, .LBB51_7 +; RV32-NEXT: bnez a0, .LBB51_6 ; RV32-NEXT: .LBB51_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: beqz a1, .LBB51_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: snez a0, a1 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bnez a0, .LBB51_1 ; RV32-NEXT: j .LBB51_5 ; RV32-NEXT: .LBB51_4: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 ; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: .LBB51_5: # %atomicrmw.start -; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bnez a0, .LBB51_1 -; RV32-NEXT: # %bb.6: # %atomicrmw.start +; RV32-NEXT: .LBB51_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB51_1 -; RV32-NEXT: .LBB51_7: # %atomicrmw.end +; RV32-NEXT: .LBB51_6: # %atomicrmw.end ; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2791,6 +2794,9 @@ ; RV32-NEXT: j .LBB52_2 ; RV32-NEXT: .LBB52_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp @@ -2800,27 +2806,20 @@ ; RV32-NEXT: call __atomic_compare_exchange_8@plt ; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) -; RV32-NEXT: bnez a0, .LBB52_7 +; RV32-NEXT: bnez a0, .LBB52_4 ; RV32-NEXT: .LBB52_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beqz a1, .LBB52_4 -; RV32-NEXT: # %bb.3: # %atomicrmw.start -; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB52_5 -; RV32-NEXT: .LBB52_4: # in Loop: Header=BB52_2 Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: .LBB52_5: # %atomicrmw.start -; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 +; RV32-NEXT: snez a2, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bnez a0, .LBB52_1 -; RV32-NEXT: # %bb.6: # %atomicrmw.start +; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB52_1 -; RV32-NEXT: .LBB52_7: # %atomicrmw.end +; RV32-NEXT: .LBB52_4: # %atomicrmw.end ; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -23,28 +23,30 @@ ; RV32IF-NEXT: beqz a1, .LBB0_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: slti a4, a1, 0 -; RV32IF-NEXT: beqz a4, .LBB0_3 -; RV32IF-NEXT: j .LBB0_4 +; RV32IF-NEXT: j .LBB0_3 ; RV32IF-NEXT: .LBB0_2: ; RV32IF-NEXT: sltu a4, a0, a3 -; RV32IF-NEXT: bnez a4, .LBB0_4 ; RV32IF-NEXT: .LBB0_3: # %entry -; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: seqz a5, a4 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a1, a5, a1 +; RV32IF-NEXT: bnez a4, .LBB0_5 +; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: .LBB0_4: # %entry +; RV32IF-NEXT: .LBB0_5: # %entry ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a1, a3, .LBB0_6 -; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: beq a1, a3, .LBB0_7 +; RV32IF-NEXT: # %bb.6: # %entry ; RV32IF-NEXT: slti a1, a1, 0 ; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: beqz a1, .LBB0_7 -; RV32IF-NEXT: j .LBB0_8 -; RV32IF-NEXT: .LBB0_6: +; RV32IF-NEXT: beqz a1, .LBB0_8 +; RV32IF-NEXT: j .LBB0_9 +; RV32IF-NEXT: .LBB0_7: ; RV32IF-NEXT: sltu a1, a2, a0 -; RV32IF-NEXT: bnez a1, .LBB0_8 -; RV32IF-NEXT: .LBB0_7: # %entry -; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: bnez a1, .LBB0_9 ; RV32IF-NEXT: .LBB0_8: # %entry +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: .LBB0_9: # %entry ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret @@ -72,11 +74,11 @@ ; ; RV32IFD-LABEL: stest_f64i32: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB0_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB0_2: # %entry +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i32: @@ -113,15 +115,11 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixunsdfdi@plt -; RV32IF-NEXT: beqz a1, .LBB1_2 -; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: j .LBB1_3 -; RV32IF-NEXT: .LBB1_2: -; RV32IF-NEXT: sltiu a1, a0, -1 -; RV32IF-NEXT: .LBB1_3: # %entry +; RV32IF-NEXT: sltiu a2, a0, -1 ; RV32IF-NEXT: snez a1, a1 ; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a1, a1, a2 +; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -146,11 +144,11 @@ ; ; RV32IFD-LABEL: utest_f64i32: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB1_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB1_2: # %entry +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: utest_f64i32: @@ -186,24 +184,21 @@ ; RV32IF-NEXT: .LBB2_2: ; RV32IF-NEXT: sltiu a2, a0, -1 ; RV32IF-NEXT: .LBB2_3: # %entry -; RV32IF-NEXT: snez a3, a2 -; RV32IF-NEXT: addi a3, a3, -1 -; RV32IF-NEXT: bnez a2, .LBB2_5 -; RV32IF-NEXT: # %bb.4: # %entry -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: .LBB2_5: # %entry +; RV32IF-NEXT: snez a2, a2 +; RV32IF-NEXT: addi a3, a2, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: or a0, a3, a0 -; RV32IF-NEXT: beqz a1, .LBB2_7 -; RV32IF-NEXT: # %bb.6: # %entry +; RV32IF-NEXT: beqz a1, .LBB2_5 +; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: sgtz a1, a1 -; RV32IF-NEXT: beqz a1, .LBB2_8 -; RV32IF-NEXT: j .LBB2_9 -; RV32IF-NEXT: .LBB2_7: +; RV32IF-NEXT: j .LBB2_6 +; RV32IF-NEXT: .LBB2_5: ; RV32IF-NEXT: snez a1, a0 -; RV32IF-NEXT: bnez a1, .LBB2_9 -; RV32IF-NEXT: .LBB2_8: # %entry -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: .LBB2_9: # %entry +; RV32IF-NEXT: .LBB2_6: # %entry +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret @@ -221,21 +216,20 @@ ; RV64IF-NEXT: # %bb.1: # %entry ; RV64IF-NEXT: mv a0, a1 ; RV64IF-NEXT: .LBB2_2: # %entry -; RV64IF-NEXT: bgtz a0, .LBB2_4 -; RV64IF-NEXT: # %bb.3: # %entry -; RV64IF-NEXT: li a0, 0 -; RV64IF-NEXT: .LBB2_4: # %entry +; RV64IF-NEXT: sgtz a1, a0 +; RV64IF-NEXT: neg a1, a1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; ; RV32IFD-LABEL: ustest_f64i32: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB2_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB2_2: # %entry +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i32: @@ -243,16 +237,13 @@ ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: li a1, -1 ; RV64IFD-NEXT: srli a1, a1, 32 -; RV64IFD-NEXT: bge a0, a1, .LBB2_3 +; RV64IFD-NEXT: blt a0, a1, .LBB2_2 ; RV64IFD-NEXT: # %bb.1: # %entry -; RV64IFD-NEXT: blez a0, .LBB2_4 -; RV64IFD-NEXT: .LBB2_2: # %entry -; RV64IFD-NEXT: ret -; RV64IFD-NEXT: .LBB2_3: # %entry ; RV64IFD-NEXT: mv a0, a1 -; RV64IFD-NEXT: bgtz a0, .LBB2_2 -; RV64IFD-NEXT: .LBB2_4: # %entry -; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: .LBB2_2: # %entry +; RV64IFD-NEXT: sgtz a1, a0 +; RV64IFD-NEXT: neg a1, a1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i64 @@ -267,11 +258,11 @@ define i32 @stest_f32i32(float %x) { ; RV32-LABEL: stest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: feq.s a0, fa0, fa0 -; RV32-NEXT: beqz a0, .LBB3_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-NEXT: .LBB3_2: # %entry +; RV32-NEXT: feq.s a1, fa0, fa0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: stest_f32i32: @@ -303,11 +294,11 @@ define i32 @utest_f32i32(float %x) { ; RV32-LABEL: utest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: feq.s a0, fa0, fa0 -; RV32-NEXT: beqz a0, .LBB4_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: fcvt.wu.s a0, fa0, rtz -; RV32-NEXT: .LBB4_2: # %entry +; RV32-NEXT: feq.s a1, fa0, fa0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: utest_f32i32: @@ -331,11 +322,11 @@ define i32 @ustest_f32i32(float %x) { ; RV32-LABEL: ustest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: feq.s a0, fa0, fa0 -; RV32-NEXT: beqz a0, .LBB5_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: fcvt.wu.s a0, fa0, rtz -; RV32-NEXT: .LBB5_2: # %entry +; RV32-NEXT: feq.s a1, fa0, fa0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i32: @@ -343,16 +334,13 @@ ; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: bge a0, a1, .LBB5_3 +; RV64-NEXT: blt a0, a1, .LBB5_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: blez a0, .LBB5_4 -; RV64-NEXT: .LBB5_2: # %entry -; RV64-NEXT: ret -; RV64-NEXT: .LBB5_3: # %entry ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgtz a0, .LBB5_2 -; RV64-NEXT: .LBB5_4: # %entry -; RV64-NEXT: li a0, 0 +; RV64-NEXT: .LBB5_2: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret entry: %conv = fptosi float %x to i64 @@ -379,28 +367,30 @@ ; RV32-NEXT: beqz a1, .LBB6_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: slti a4, a1, 0 -; RV32-NEXT: beqz a4, .LBB6_3 -; RV32-NEXT: j .LBB6_4 +; RV32-NEXT: j .LBB6_3 ; RV32-NEXT: .LBB6_2: ; RV32-NEXT: sltu a4, a0, a3 -; RV32-NEXT: bnez a4, .LBB6_4 ; RV32-NEXT: .LBB6_3: # %entry -; RV32-NEXT: li a1, 0 +; RV32-NEXT: seqz a5, a4 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a1, a5, a1 +; RV32-NEXT: bnez a4, .LBB6_5 +; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB6_4: # %entry +; RV32-NEXT: .LBB6_5: # %entry ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a1, a3, .LBB6_6 -; RV32-NEXT: # %bb.5: # %entry +; RV32-NEXT: beq a1, a3, .LBB6_7 +; RV32-NEXT: # %bb.6: # %entry ; RV32-NEXT: slti a1, a1, 0 ; RV32-NEXT: xori a1, a1, 1 -; RV32-NEXT: beqz a1, .LBB6_7 -; RV32-NEXT: j .LBB6_8 -; RV32-NEXT: .LBB6_6: +; RV32-NEXT: beqz a1, .LBB6_8 +; RV32-NEXT: j .LBB6_9 +; RV32-NEXT: .LBB6_7: ; RV32-NEXT: sltu a1, a2, a0 -; RV32-NEXT: bnez a1, .LBB6_8 -; RV32-NEXT: .LBB6_7: # %entry -; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: bnez a1, .LBB6_9 ; RV32-NEXT: .LBB6_8: # %entry +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: .LBB6_9: # %entry ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -447,15 +437,11 @@ ; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: beqz a1, .LBB7_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: j .LBB7_3 -; RV32-NEXT: .LBB7_2: -; RV32-NEXT: sltiu a1, a0, -1 -; RV32-NEXT: .LBB7_3: # %entry +; RV32-NEXT: sltiu a2, a0, -1 ; RV32-NEXT: snez a1, a1 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 @@ -504,24 +490,21 @@ ; RV32-NEXT: .LBB8_2: ; RV32-NEXT: sltiu a2, a0, -1 ; RV32-NEXT: .LBB8_3: # %entry -; RV32-NEXT: snez a3, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: bnez a2, .LBB8_5 -; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB8_5: # %entry +; RV32-NEXT: snez a2, a2 +; RV32-NEXT: addi a3, a2, -1 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: or a0, a3, a0 -; RV32-NEXT: beqz a1, .LBB8_7 -; RV32-NEXT: # %bb.6: # %entry +; RV32-NEXT: beqz a1, .LBB8_5 +; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: sgtz a1, a1 -; RV32-NEXT: beqz a1, .LBB8_8 -; RV32-NEXT: j .LBB8_9 -; RV32-NEXT: .LBB8_7: +; RV32-NEXT: j .LBB8_6 +; RV32-NEXT: .LBB8_5: ; RV32-NEXT: snez a1, a0 -; RV32-NEXT: bnez a1, .LBB8_9 -; RV32-NEXT: .LBB8_8: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB8_9: # %entry +; RV32-NEXT: .LBB8_6: # %entry +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -541,10 +524,9 @@ ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB8_2: # %entry -; RV64-NEXT: bgtz a0, .LBB8_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB8_4: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -730,10 +712,9 @@ ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: mv a0, a1 ; RV32IF-NEXT: .LBB11_2: # %entry -; RV32IF-NEXT: bgtz a0, .LBB11_4 -; RV32IF-NEXT: # %bb.3: # %entry -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: .LBB11_4: # %entry +; RV32IF-NEXT: sgtz a1, a0 +; RV32IF-NEXT: neg a1, a1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret @@ -751,10 +732,9 @@ ; RV64IF-NEXT: # %bb.1: # %entry ; RV64IF-NEXT: mv a0, a1 ; RV64IF-NEXT: .LBB11_2: # %entry -; RV64IF-NEXT: bgtz a0, .LBB11_4 -; RV64IF-NEXT: # %bb.3: # %entry -; RV64IF-NEXT: li a0, 0 -; RV64IF-NEXT: .LBB11_4: # %entry +; RV64IF-NEXT: sgtz a1, a0 +; RV64IF-NEXT: neg a1, a1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret @@ -764,16 +744,13 @@ ; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 16 ; RV32IFD-NEXT: addi a1, a1, -1 -; RV32IFD-NEXT: bge a0, a1, .LBB11_3 +; RV32IFD-NEXT: blt a0, a1, .LBB11_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: blez a0, .LBB11_4 -; RV32IFD-NEXT: .LBB11_2: # %entry -; RV32IFD-NEXT: ret -; RV32IFD-NEXT: .LBB11_3: # %entry ; RV32IFD-NEXT: mv a0, a1 -; RV32IFD-NEXT: bgtz a0, .LBB11_2 -; RV32IFD-NEXT: .LBB11_4: # %entry -; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: .LBB11_2: # %entry +; RV32IFD-NEXT: sgtz a1, a0 +; RV32IFD-NEXT: neg a1, a1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i16: @@ -781,16 +758,13 @@ ; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 16 ; RV64IFD-NEXT: addiw a1, a1, -1 -; RV64IFD-NEXT: bge a0, a1, .LBB11_3 +; RV64IFD-NEXT: blt a0, a1, .LBB11_2 ; RV64IFD-NEXT: # %bb.1: # %entry -; RV64IFD-NEXT: blez a0, .LBB11_4 -; RV64IFD-NEXT: .LBB11_2: # %entry -; RV64IFD-NEXT: ret -; RV64IFD-NEXT: .LBB11_3: # %entry ; RV64IFD-NEXT: mv a0, a1 -; RV64IFD-NEXT: bgtz a0, .LBB11_2 -; RV64IFD-NEXT: .LBB11_4: # %entry -; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: .LBB11_2: # %entry +; RV64IFD-NEXT: sgtz a1, a0 +; RV64IFD-NEXT: neg a1, a1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i32 @@ -886,16 +860,13 @@ ; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: bge a0, a1, .LBB14_3 +; RV32-NEXT: blt a0, a1, .LBB14_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: blez a0, .LBB14_4 -; RV32-NEXT: .LBB14_2: # %entry -; RV32-NEXT: ret -; RV32-NEXT: .LBB14_3: # %entry ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bgtz a0, .LBB14_2 -; RV32-NEXT: .LBB14_4: # %entry -; RV32-NEXT: li a0, 0 +; RV32-NEXT: .LBB14_2: # %entry +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i16: @@ -903,16 +874,13 @@ ; RV64-NEXT: fcvt.w.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: bge a0, a1, .LBB14_3 +; RV64-NEXT: blt a0, a1, .LBB14_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: blez a0, .LBB14_4 -; RV64-NEXT: .LBB14_2: # %entry -; RV64-NEXT: ret -; RV64-NEXT: .LBB14_3: # %entry ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgtz a0, .LBB14_2 -; RV64-NEXT: .LBB14_4: # %entry -; RV64-NEXT: li a0, 0 +; RV64-NEXT: .LBB14_2: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret entry: %conv = fptosi float %x to i32 @@ -1044,10 +1012,9 @@ ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB17_2: # %entry -; RV32-NEXT: bgtz a0, .LBB17_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB17_4: # %entry +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1067,10 +1034,9 @@ ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB17_2: # %entry -; RV64-NEXT: bgtz a0, .LBB17_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB17_4: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1097,53 +1063,56 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a2, 20(sp) -; RV32IF-NEXT: lw a3, 16(sp) +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a2, 16(sp) ; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: addi a5, a4, -1 +; RV32IF-NEXT: lw a4, 8(sp) +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: addi a5, a3, -1 ; RV32IF-NEXT: beq a1, a5, .LBB18_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a3, a2 +; RV32IF-NEXT: or a7, a2, a0 ; RV32IF-NEXT: bnez a7, .LBB18_3 ; RV32IF-NEXT: j .LBB18_4 ; RV32IF-NEXT: .LBB18_2: -; RV32IF-NEXT: sltiu a6, a0, -1 -; RV32IF-NEXT: or a7, a3, a2 +; RV32IF-NEXT: sltiu a6, a4, -1 +; RV32IF-NEXT: or a7, a2, a0 ; RV32IF-NEXT: beqz a7, .LBB18_4 ; RV32IF-NEXT: .LBB18_3: # %entry -; RV32IF-NEXT: slti a6, a2, 0 +; RV32IF-NEXT: slti a6, a0, 0 ; RV32IF-NEXT: .LBB18_4: # %entry -; RV32IF-NEXT: snez a7, a6 -; RV32IF-NEXT: addi a7, a7, -1 +; RV32IF-NEXT: seqz t0, a6 +; RV32IF-NEXT: addi a7, t0, -1 +; RV32IF-NEXT: neg t0, t0 ; RV32IF-NEXT: bnez a6, .LBB18_6 ; RV32IF-NEXT: # %bb.5: # %entry -; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: li a3, 0 ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB18_6: # %entry -; RV32IF-NEXT: or a0, a7, a0 -; RV32IF-NEXT: beq a1, a4, .LBB18_8 +; RV32IF-NEXT: or a4, t0, a4 +; RV32IF-NEXT: and a5, a7, a0 +; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: beq a1, a3, .LBB18_8 ; RV32IF-NEXT: # %bb.7: # %entry -; RV32IF-NEXT: sltu a4, a4, a1 +; RV32IF-NEXT: sltu a0, a3, a1 ; RV32IF-NEXT: j .LBB18_9 ; RV32IF-NEXT: .LBB18_8: -; RV32IF-NEXT: snez a4, a0 +; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB18_9: # %entry -; RV32IF-NEXT: and a3, a3, a2 -; RV32IF-NEXT: li a5, -1 -; RV32IF-NEXT: beq a3, a5, .LBB18_11 +; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: li a3, -1 +; RV32IF-NEXT: beq a2, a3, .LBB18_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a2, a2, 0 -; RV32IF-NEXT: xori a4, a2, 1 +; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB18_11: # %entry -; RV32IF-NEXT: bnez a4, .LBB18_13 +; RV32IF-NEXT: bnez a0, .LBB18_13 ; RV32IF-NEXT: # %bb.12: # %entry -; RV32IF-NEXT: li a0, 0 ; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB18_13: # %entry +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: and a0, a0, a4 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -1160,28 +1129,30 @@ ; RV64IF-NEXT: beqz a1, .LBB18_2 ; RV64IF-NEXT: # %bb.1: # %entry ; RV64IF-NEXT: slti a4, a1, 0 -; RV64IF-NEXT: beqz a4, .LBB18_3 -; RV64IF-NEXT: j .LBB18_4 +; RV64IF-NEXT: j .LBB18_3 ; RV64IF-NEXT: .LBB18_2: ; RV64IF-NEXT: sltu a4, a0, a3 -; RV64IF-NEXT: bnez a4, .LBB18_4 ; RV64IF-NEXT: .LBB18_3: # %entry -; RV64IF-NEXT: li a1, 0 -; RV64IF-NEXT: mv a0, a3 -; RV64IF-NEXT: .LBB18_4: # %entry -; RV64IF-NEXT: slli a3, a2, 63 -; RV64IF-NEXT: beq a1, a2, .LBB18_6 -; RV64IF-NEXT: # %bb.5: # %entry -; RV64IF-NEXT: slti a1, a1, 0 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: beqz a1, .LBB18_7 -; RV64IF-NEXT: j .LBB18_8 -; RV64IF-NEXT: .LBB18_6: -; RV64IF-NEXT: sltu a1, a3, a0 -; RV64IF-NEXT: bnez a1, .LBB18_8 -; RV64IF-NEXT: .LBB18_7: # %entry +; RV64IF-NEXT: seqz a5, a4 +; RV64IF-NEXT: addi a5, a5, -1 +; RV64IF-NEXT: and a5, a5, a1 +; RV64IF-NEXT: bnez a4, .LBB18_5 +; RV64IF-NEXT: # %bb.4: # %entry ; RV64IF-NEXT: mv a0, a3 +; RV64IF-NEXT: .LBB18_5: # %entry +; RV64IF-NEXT: slli a1, a2, 63 +; RV64IF-NEXT: beq a5, a2, .LBB18_7 +; RV64IF-NEXT: # %bb.6: # %entry +; RV64IF-NEXT: slti a2, a5, 0 +; RV64IF-NEXT: xori a2, a2, 1 +; RV64IF-NEXT: beqz a2, .LBB18_8 +; RV64IF-NEXT: j .LBB18_9 +; RV64IF-NEXT: .LBB18_7: +; RV64IF-NEXT: sltu a2, a1, a0 +; RV64IF-NEXT: bnez a2, .LBB18_9 ; RV64IF-NEXT: .LBB18_8: # %entry +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: .LBB18_9: # %entry ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret @@ -1194,64 +1165,67 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a2, 20(sp) -; RV32IFD-NEXT: lw a3, 16(sp) +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a2, 16(sp) ; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: addi a5, a4, -1 +; RV32IFD-NEXT: lw a4, 8(sp) +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: addi a5, a3, -1 ; RV32IFD-NEXT: beq a1, a5, .LBB18_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a3, a2 +; RV32IFD-NEXT: or a7, a2, a0 ; RV32IFD-NEXT: bnez a7, .LBB18_3 ; RV32IFD-NEXT: j .LBB18_4 ; RV32IFD-NEXT: .LBB18_2: -; RV32IFD-NEXT: sltiu a6, a0, -1 -; RV32IFD-NEXT: or a7, a3, a2 +; RV32IFD-NEXT: sltiu a6, a4, -1 +; RV32IFD-NEXT: or a7, a2, a0 ; RV32IFD-NEXT: beqz a7, .LBB18_4 ; RV32IFD-NEXT: .LBB18_3: # %entry -; RV32IFD-NEXT: slti a6, a2, 0 +; RV32IFD-NEXT: slti a6, a0, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry -; RV32IFD-NEXT: snez a7, a6 -; RV32IFD-NEXT: addi a7, a7, -1 +; RV32IFD-NEXT: seqz t0, a6 +; RV32IFD-NEXT: addi a7, t0, -1 +; RV32IFD-NEXT: neg t0, t0 ; RV32IFD-NEXT: bnez a6, .LBB18_6 ; RV32IFD-NEXT: # %bb.5: # %entry -; RV32IFD-NEXT: li a2, 0 -; RV32IFD-NEXT: li a3, 0 ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB18_6: # %entry -; RV32IFD-NEXT: or a0, a7, a0 -; RV32IFD-NEXT: beq a1, a4, .LBB18_8 +; RV32IFD-NEXT: or a4, t0, a4 +; RV32IFD-NEXT: and a5, a7, a0 +; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: beq a1, a3, .LBB18_8 ; RV32IFD-NEXT: # %bb.7: # %entry -; RV32IFD-NEXT: sltu a4, a4, a1 +; RV32IFD-NEXT: sltu a0, a3, a1 ; RV32IFD-NEXT: j .LBB18_9 ; RV32IFD-NEXT: .LBB18_8: -; RV32IFD-NEXT: snez a4, a0 +; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB18_9: # %entry -; RV32IFD-NEXT: and a3, a3, a2 -; RV32IFD-NEXT: li a5, -1 -; RV32IFD-NEXT: beq a3, a5, .LBB18_11 +; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: li a3, -1 +; RV32IFD-NEXT: beq a2, a3, .LBB18_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a2, a2, 0 -; RV32IFD-NEXT: xori a4, a2, 1 +; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB18_11: # %entry -; RV32IFD-NEXT: bnez a4, .LBB18_13 +; RV32IFD-NEXT: bnez a0, .LBB18_13 ; RV32IFD-NEXT: # %bb.12: # %entry -; RV32IFD-NEXT: li a0, 0 ; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB18_13: # %entry +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: and a0, a0, a4 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i64: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB18_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz -; RV64IFD-NEXT: .LBB18_2: # %entry +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i128 @@ -1274,30 +1248,23 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixunsdfti@plt -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a1, 16(sp) -; RV32IF-NEXT: beqz a0, .LBB19_2 -; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: j .LBB19_3 -; RV32IF-NEXT: .LBB19_2: -; RV32IF-NEXT: seqz a2, a1 -; RV32IF-NEXT: .LBB19_3: # %entry -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a1, a1, a0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: beqz a1, .LBB19_5 -; RV32IF-NEXT: # %bb.4: # %entry -; RV32IF-NEXT: mv a0, a2 -; RV32IF-NEXT: .LBB19_5: # %entry -; RV32IF-NEXT: bnez a0, .LBB19_7 -; RV32IF-NEXT: # %bb.6: # %entry -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: j .LBB19_8 -; RV32IF-NEXT: .LBB19_7: -; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: .LBB19_8: # %entry +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a1, 20(sp) +; RV32IF-NEXT: lw a2, 12(sp) +; RV32IF-NEXT: lw a3, 8(sp) +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 +; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: and a0, a0, a4 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi a1, a0, -1 +; RV32IF-NEXT: and a0, a1, a3 +; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -1309,10 +1276,9 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunsdfti@plt -; RV64-NEXT: beqz a1, .LBB19_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB19_2: # %entry +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1325,30 +1291,23 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixunsdfti@plt -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a1, 16(sp) -; RV32IFD-NEXT: beqz a0, .LBB19_2 -; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: li a2, 0 -; RV32IFD-NEXT: j .LBB19_3 -; RV32IFD-NEXT: .LBB19_2: -; RV32IFD-NEXT: seqz a2, a1 -; RV32IFD-NEXT: .LBB19_3: # %entry -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a1, a1, a0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: beqz a1, .LBB19_5 -; RV32IFD-NEXT: # %bb.4: # %entry -; RV32IFD-NEXT: mv a0, a2 -; RV32IFD-NEXT: .LBB19_5: # %entry -; RV32IFD-NEXT: bnez a0, .LBB19_7 -; RV32IFD-NEXT: # %bb.6: # %entry -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: j .LBB19_8 -; RV32IFD-NEXT: .LBB19_7: -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: .LBB19_8: # %entry +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: lw a2, 12(sp) +; RV32IFD-NEXT: lw a3, 8(sp) +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: and a0, a0, a4 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi a1, a0, -1 +; RV32IFD-NEXT: and a0, a1, a3 +; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -1371,55 +1330,55 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a2, 20(sp) -; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a2, .LBB20_2 +; RV32IF-NEXT: lw a1, 20(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: beqz a1, .LBB20_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a0, a2, 0 +; RV32IF-NEXT: slti a2, a1, 0 ; RV32IF-NEXT: j .LBB20_3 ; RV32IF-NEXT: .LBB20_2: -; RV32IF-NEXT: seqz a0, a3 +; RV32IF-NEXT: seqz a2, a0 ; RV32IF-NEXT: .LBB20_3: # %entry -; RV32IF-NEXT: xori a1, a3, 1 -; RV32IF-NEXT: or a4, a1, a2 -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a4, .LBB20_5 +; RV32IF-NEXT: lw a4, 12(sp) +; RV32IF-NEXT: xori a3, a0, 1 +; RV32IF-NEXT: or a3, a3, a1 +; RV32IF-NEXT: seqz a3, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a2, a3, a2 +; RV32IF-NEXT: seqz a3, a2 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: bnez a2, .LBB20_5 ; RV32IF-NEXT: # %bb.4: # %entry -; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB20_5: # %entry -; RV32IF-NEXT: bnez a1, .LBB20_9 +; RV32IF-NEXT: lw a5, 8(sp) +; RV32IF-NEXT: and a2, a3, a1 +; RV32IF-NEXT: and a1, a3, a4 +; RV32IF-NEXT: beqz a2, .LBB20_8 ; RV32IF-NEXT: # %bb.6: # %entry -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: li a3, 1 -; RV32IF-NEXT: bnez a2, .LBB20_10 +; RV32IF-NEXT: sgtz a4, a2 +; RV32IF-NEXT: and a3, a3, a5 +; RV32IF-NEXT: bnez a1, .LBB20_9 ; RV32IF-NEXT: .LBB20_7: -; RV32IF-NEXT: snez a4, a3 -; RV32IF-NEXT: bnez a1, .LBB20_11 +; RV32IF-NEXT: snez a5, a3 +; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: bnez a0, .LBB20_10 +; RV32IF-NEXT: j .LBB20_11 ; RV32IF-NEXT: .LBB20_8: -; RV32IF-NEXT: snez a5, a0 -; RV32IF-NEXT: or a2, a3, a2 -; RV32IF-NEXT: bnez a2, .LBB20_12 -; RV32IF-NEXT: j .LBB20_13 -; RV32IF-NEXT: .LBB20_9: -; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: beqz a2, .LBB20_7 -; RV32IF-NEXT: .LBB20_10: # %entry -; RV32IF-NEXT: sgtz a4, a2 -; RV32IF-NEXT: beqz a1, .LBB20_8 -; RV32IF-NEXT: .LBB20_11: # %entry +; RV32IF-NEXT: snez a4, a0 +; RV32IF-NEXT: and a3, a3, a5 +; RV32IF-NEXT: beqz a1, .LBB20_7 +; RV32IF-NEXT: .LBB20_9: # %entry ; RV32IF-NEXT: snez a5, a1 -; RV32IF-NEXT: or a2, a3, a2 -; RV32IF-NEXT: beqz a2, .LBB20_13 -; RV32IF-NEXT: .LBB20_12: # %entry +; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: beqz a0, .LBB20_11 +; RV32IF-NEXT: .LBB20_10: # %entry ; RV32IF-NEXT: mv a5, a4 -; RV32IF-NEXT: .LBB20_13: # %entry -; RV32IF-NEXT: bnez a5, .LBB20_15 -; RV32IF-NEXT: # %bb.14: # %entry -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: .LBB20_15: # %entry +; RV32IF-NEXT: .LBB20_11: # %entry +; RV32IF-NEXT: seqz a0, a5 +; RV32IF-NEXT: addi a2, a0, -1 +; RV32IF-NEXT: and a0, a2, a3 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -1431,22 +1390,24 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixdfti@plt +; RV64-NEXT: mv a2, a1 ; RV64-NEXT: blez a1, .LBB20_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: li a1, 1 +; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB20_2: # %entry -; RV64-NEXT: beqz a1, .LBB20_4 -; RV64-NEXT: # %bb.3: # %entry ; RV64-NEXT: sgtz a1, a1 -; RV64-NEXT: beqz a1, .LBB20_5 -; RV64-NEXT: j .LBB20_6 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: beqz a2, .LBB20_4 +; RV64-NEXT: # %bb.3: # %entry +; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: j .LBB20_5 ; RV64-NEXT: .LBB20_4: ; RV64-NEXT: snez a1, a0 -; RV64-NEXT: bnez a1, .LBB20_6 ; RV64-NEXT: .LBB20_5: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB20_6: # %entry +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1459,55 +1420,55 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a2, 20(sp) -; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a2, .LBB20_2 +; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: beqz a1, .LBB20_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a0, a2, 0 +; RV32IFD-NEXT: slti a2, a1, 0 ; RV32IFD-NEXT: j .LBB20_3 ; RV32IFD-NEXT: .LBB20_2: -; RV32IFD-NEXT: seqz a0, a3 +; RV32IFD-NEXT: seqz a2, a0 ; RV32IFD-NEXT: .LBB20_3: # %entry -; RV32IFD-NEXT: xori a1, a3, 1 -; RV32IFD-NEXT: or a4, a1, a2 -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: beqz a4, .LBB20_5 +; RV32IFD-NEXT: lw a4, 12(sp) +; RV32IFD-NEXT: xori a3, a0, 1 +; RV32IFD-NEXT: or a3, a3, a1 +; RV32IFD-NEXT: seqz a3, a3 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: and a2, a3, a2 +; RV32IFD-NEXT: seqz a3, a2 +; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: bnez a2, .LBB20_5 ; RV32IFD-NEXT: # %bb.4: # %entry -; RV32IFD-NEXT: mv a1, a0 +; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB20_5: # %entry -; RV32IFD-NEXT: bnez a1, .LBB20_9 +; RV32IFD-NEXT: lw a5, 8(sp) +; RV32IFD-NEXT: and a2, a3, a1 +; RV32IFD-NEXT: and a1, a3, a4 +; RV32IFD-NEXT: beqz a2, .LBB20_8 ; RV32IFD-NEXT: # %bb.6: # %entry -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: li a2, 0 -; RV32IFD-NEXT: li a3, 1 -; RV32IFD-NEXT: bnez a2, .LBB20_10 +; RV32IFD-NEXT: sgtz a4, a2 +; RV32IFD-NEXT: and a3, a3, a5 +; RV32IFD-NEXT: bnez a1, .LBB20_9 ; RV32IFD-NEXT: .LBB20_7: -; RV32IFD-NEXT: snez a4, a3 -; RV32IFD-NEXT: bnez a1, .LBB20_11 +; RV32IFD-NEXT: snez a5, a3 +; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: bnez a0, .LBB20_10 +; RV32IFD-NEXT: j .LBB20_11 ; RV32IFD-NEXT: .LBB20_8: -; RV32IFD-NEXT: snez a5, a0 -; RV32IFD-NEXT: or a2, a3, a2 -; RV32IFD-NEXT: bnez a2, .LBB20_12 -; RV32IFD-NEXT: j .LBB20_13 -; RV32IFD-NEXT: .LBB20_9: -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: beqz a2, .LBB20_7 -; RV32IFD-NEXT: .LBB20_10: # %entry -; RV32IFD-NEXT: sgtz a4, a2 -; RV32IFD-NEXT: beqz a1, .LBB20_8 -; RV32IFD-NEXT: .LBB20_11: # %entry +; RV32IFD-NEXT: snez a4, a0 +; RV32IFD-NEXT: and a3, a3, a5 +; RV32IFD-NEXT: beqz a1, .LBB20_7 +; RV32IFD-NEXT: .LBB20_9: # %entry ; RV32IFD-NEXT: snez a5, a1 -; RV32IFD-NEXT: or a2, a3, a2 -; RV32IFD-NEXT: beqz a2, .LBB20_13 -; RV32IFD-NEXT: .LBB20_12: # %entry +; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: beqz a0, .LBB20_11 +; RV32IFD-NEXT: .LBB20_10: # %entry ; RV32IFD-NEXT: mv a5, a4 -; RV32IFD-NEXT: .LBB20_13: # %entry -; RV32IFD-NEXT: bnez a5, .LBB20_15 -; RV32IFD-NEXT: # %bb.14: # %entry -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: .LBB20_15: # %entry +; RV32IFD-NEXT: .LBB20_11: # %entry +; RV32IFD-NEXT: seqz a0, a5 +; RV32IFD-NEXT: addi a2, a0, -1 +; RV32IFD-NEXT: and a0, a2, a3 +; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -1530,64 +1491,67 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a2, 16(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a5, a4, -1 +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lui a3, 524288 +; RV32-NEXT: addi a5, a3, -1 ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a3, a2 +; RV32-NEXT: or a7, a2, a0 ; RV32-NEXT: bnez a7, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: -; RV32-NEXT: sltiu a6, a0, -1 -; RV32-NEXT: or a7, a3, a2 +; RV32-NEXT: sltiu a6, a4, -1 +; RV32-NEXT: or a7, a2, a0 ; RV32-NEXT: beqz a7, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a6, a2, 0 +; RV32-NEXT: slti a6, a0, 0 ; RV32-NEXT: .LBB21_4: # %entry -; RV32-NEXT: snez a7, a6 -; RV32-NEXT: addi a7, a7, -1 +; RV32-NEXT: seqz t0, a6 +; RV32-NEXT: addi a7, t0, -1 +; RV32-NEXT: neg t0, t0 ; RV32-NEXT: bnez a6, .LBB21_6 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: or a0, a7, a0 -; RV32-NEXT: beq a1, a4, .LBB21_8 +; RV32-NEXT: or a4, t0, a4 +; RV32-NEXT: and a5, a7, a0 +; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: beq a1, a3, .LBB21_8 ; RV32-NEXT: # %bb.7: # %entry -; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: sltu a0, a3, a1 ; RV32-NEXT: j .LBB21_9 ; RV32-NEXT: .LBB21_8: -; RV32-NEXT: snez a4, a0 +; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB21_9: # %entry -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a5, -1 -; RV32-NEXT: beq a3, a5, .LBB21_11 +; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: li a3, -1 +; RV32-NEXT: beq a2, a3, .LBB21_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: xori a4, a2, 1 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB21_11: # %entry -; RV32-NEXT: bnez a4, .LBB21_13 +; RV32-NEXT: bnez a0, .LBB21_13 ; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: li a0, 0 ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB21_13: # %entry +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: stest_f32i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: feq.s a0, fa0, fa0 -; RV64-NEXT: beqz a0, .LBB21_2 -; RV64-NEXT: # %bb.1: ; RV64-NEXT: fcvt.l.s a0, fa0, rtz -; RV64-NEXT: .LBB21_2: # %entry +; RV64-NEXT: feq.s a1, fa0, fa0 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret entry: %conv = fptosi float %x to i128 @@ -1608,30 +1572,23 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: beqz a0, .LBB22_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: j .LBB22_3 -; RV32-NEXT: .LBB22_2: -; RV32-NEXT: seqz a2, a1 -; RV32-NEXT: .LBB22_3: # %entry -; RV32-NEXT: xori a1, a1, 1 -; RV32-NEXT: or a1, a1, a0 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: beqz a1, .LBB22_5 -; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB22_5: # %entry -; RV32-NEXT: bnez a0, .LBB22_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: j .LBB22_8 -; RV32-NEXT: .LBB22_7: -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: .LBB22_8: # %entry +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a2, 12(sp) +; RV32-NEXT: lw a3, 8(sp) +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: and a0, a1, a3 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1643,10 +1600,9 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: beqz a1, .LBB22_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB22_2: # %entry +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1667,55 +1623,55 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB23_2 +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beqz a1, .LBB23_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: slti a2, a1, 0 ; RV32-NEXT: j .LBB23_3 ; RV32-NEXT: .LBB23_2: -; RV32-NEXT: seqz a0, a3 +; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: .LBB23_3: # %entry -; RV32-NEXT: xori a1, a3, 1 -; RV32-NEXT: or a4, a1, a2 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a4, .LBB23_5 +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: xori a3, a0, 1 +; RV32-NEXT: or a3, a3, a1 +; RV32-NEXT: seqz a3, a3 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: seqz a3, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: bnez a2, .LBB23_5 ; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: mv a1, a0 +; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB23_5: # %entry -; RV32-NEXT: bnez a1, .LBB23_9 +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: and a2, a3, a1 +; RV32-NEXT: and a1, a3, a4 +; RV32-NEXT: beqz a2, .LBB23_8 ; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: bnez a2, .LBB23_10 +; RV32-NEXT: sgtz a4, a2 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: bnez a1, .LBB23_9 ; RV32-NEXT: .LBB23_7: -; RV32-NEXT: snez a4, a3 -; RV32-NEXT: bnez a1, .LBB23_11 +; RV32-NEXT: snez a5, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: bnez a0, .LBB23_10 +; RV32-NEXT: j .LBB23_11 ; RV32-NEXT: .LBB23_8: -; RV32-NEXT: snez a5, a0 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB23_12 -; RV32-NEXT: j .LBB23_13 -; RV32-NEXT: .LBB23_9: -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: beqz a2, .LBB23_7 -; RV32-NEXT: .LBB23_10: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: beqz a1, .LBB23_8 -; RV32-NEXT: .LBB23_11: # %entry +; RV32-NEXT: snez a4, a0 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: beqz a1, .LBB23_7 +; RV32-NEXT: .LBB23_9: # %entry ; RV32-NEXT: snez a5, a1 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: beqz a2, .LBB23_13 -; RV32-NEXT: .LBB23_12: # %entry +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: beqz a0, .LBB23_11 +; RV32-NEXT: .LBB23_10: # %entry ; RV32-NEXT: mv a5, a4 -; RV32-NEXT: .LBB23_13: # %entry -; RV32-NEXT: bnez a5, .LBB23_15 -; RV32-NEXT: # %bb.14: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB23_15: # %entry +; RV32-NEXT: .LBB23_11: # %entry +; RV32-NEXT: seqz a0, a5 +; RV32-NEXT: addi a2, a0, -1 +; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1727,22 +1683,24 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixsfti@plt +; RV64-NEXT: mv a2, a1 ; RV64-NEXT: blez a1, .LBB23_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: li a1, 1 +; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB23_2: # %entry -; RV64-NEXT: beqz a1, .LBB23_4 -; RV64-NEXT: # %bb.3: # %entry ; RV64-NEXT: sgtz a1, a1 -; RV64-NEXT: beqz a1, .LBB23_5 -; RV64-NEXT: j .LBB23_6 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: beqz a2, .LBB23_4 +; RV64-NEXT: # %bb.3: # %entry +; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: j .LBB23_5 ; RV64-NEXT: .LBB23_4: ; RV64-NEXT: snez a1, a0 -; RV64-NEXT: bnez a1, .LBB23_6 ; RV64-NEXT: .LBB23_5: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB23_6: # %entry +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1767,53 +1725,56 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a2, 16(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a5, a4, -1 +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lui a3, 524288 +; RV32-NEXT: addi a5, a3, -1 ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a3, a2 +; RV32-NEXT: or a7, a2, a0 ; RV32-NEXT: bnez a7, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: -; RV32-NEXT: sltiu a6, a0, -1 -; RV32-NEXT: or a7, a3, a2 +; RV32-NEXT: sltiu a6, a4, -1 +; RV32-NEXT: or a7, a2, a0 ; RV32-NEXT: beqz a7, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a6, a2, 0 +; RV32-NEXT: slti a6, a0, 0 ; RV32-NEXT: .LBB24_4: # %entry -; RV32-NEXT: snez a7, a6 -; RV32-NEXT: addi a7, a7, -1 +; RV32-NEXT: seqz t0, a6 +; RV32-NEXT: addi a7, t0, -1 +; RV32-NEXT: neg t0, t0 ; RV32-NEXT: bnez a6, .LBB24_6 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: or a0, a7, a0 -; RV32-NEXT: beq a1, a4, .LBB24_8 +; RV32-NEXT: or a4, t0, a4 +; RV32-NEXT: and a5, a7, a0 +; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: beq a1, a3, .LBB24_8 ; RV32-NEXT: # %bb.7: # %entry -; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: sltu a0, a3, a1 ; RV32-NEXT: j .LBB24_9 ; RV32-NEXT: .LBB24_8: -; RV32-NEXT: snez a4, a0 +; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB24_9: # %entry -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a5, -1 -; RV32-NEXT: beq a3, a5, .LBB24_11 +; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: li a3, -1 +; RV32-NEXT: beq a2, a3, .LBB24_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: xori a4, a2, 1 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB24_11: # %entry -; RV32-NEXT: bnez a4, .LBB24_13 +; RV32-NEXT: bnez a0, .LBB24_13 ; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: li a0, 0 ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB24_13: # %entry +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1832,28 +1793,30 @@ ; RV64-NEXT: beqz a1, .LBB24_2 ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: slti a4, a1, 0 -; RV64-NEXT: beqz a4, .LBB24_3 -; RV64-NEXT: j .LBB24_4 +; RV64-NEXT: j .LBB24_3 ; RV64-NEXT: .LBB24_2: ; RV64-NEXT: sltu a4, a0, a3 -; RV64-NEXT: bnez a4, .LBB24_4 ; RV64-NEXT: .LBB24_3: # %entry -; RV64-NEXT: li a1, 0 -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB24_4: # %entry -; RV64-NEXT: slli a3, a2, 63 -; RV64-NEXT: beq a1, a2, .LBB24_6 -; RV64-NEXT: # %bb.5: # %entry -; RV64-NEXT: slti a1, a1, 0 -; RV64-NEXT: xori a1, a1, 1 -; RV64-NEXT: beqz a1, .LBB24_7 -; RV64-NEXT: j .LBB24_8 -; RV64-NEXT: .LBB24_6: -; RV64-NEXT: sltu a1, a3, a0 -; RV64-NEXT: bnez a1, .LBB24_8 -; RV64-NEXT: .LBB24_7: # %entry +; RV64-NEXT: seqz a5, a4 +; RV64-NEXT: addi a5, a5, -1 +; RV64-NEXT: and a5, a5, a1 +; RV64-NEXT: bnez a4, .LBB24_5 +; RV64-NEXT: # %bb.4: # %entry ; RV64-NEXT: mv a0, a3 +; RV64-NEXT: .LBB24_5: # %entry +; RV64-NEXT: slli a1, a2, 63 +; RV64-NEXT: beq a5, a2, .LBB24_7 +; RV64-NEXT: # %bb.6: # %entry +; RV64-NEXT: slti a2, a5, 0 +; RV64-NEXT: xori a2, a2, 1 +; RV64-NEXT: beqz a2, .LBB24_8 +; RV64-NEXT: j .LBB24_9 +; RV64-NEXT: .LBB24_7: +; RV64-NEXT: sltu a2, a1, a0 +; RV64-NEXT: bnez a2, .LBB24_9 ; RV64-NEXT: .LBB24_8: # %entry +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB24_9: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1878,30 +1841,23 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: beqz a0, .LBB25_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: j .LBB25_3 -; RV32-NEXT: .LBB25_2: -; RV32-NEXT: seqz a2, a1 -; RV32-NEXT: .LBB25_3: # %entry -; RV32-NEXT: xori a1, a1, 1 -; RV32-NEXT: or a1, a1, a0 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: beqz a1, .LBB25_5 -; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB25_5: # %entry -; RV32-NEXT: bnez a0, .LBB25_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: j .LBB25_8 -; RV32-NEXT: .LBB25_7: -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: .LBB25_8: # %entry +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a2, 12(sp) +; RV32-NEXT: lw a3, 8(sp) +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: and a0, a1, a3 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1915,10 +1871,9 @@ ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: beqz a1, .LBB25_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB25_2: # %entry +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1941,55 +1896,55 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB26_2 +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beqz a1, .LBB26_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: slti a2, a1, 0 ; RV32-NEXT: j .LBB26_3 ; RV32-NEXT: .LBB26_2: -; RV32-NEXT: seqz a0, a3 +; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: .LBB26_3: # %entry -; RV32-NEXT: xori a1, a3, 1 -; RV32-NEXT: or a4, a1, a2 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a4, .LBB26_5 +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: xori a3, a0, 1 +; RV32-NEXT: or a3, a3, a1 +; RV32-NEXT: seqz a3, a3 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: seqz a3, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: bnez a2, .LBB26_5 ; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: mv a1, a0 +; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB26_5: # %entry -; RV32-NEXT: bnez a1, .LBB26_9 +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: and a2, a3, a1 +; RV32-NEXT: and a1, a3, a4 +; RV32-NEXT: beqz a2, .LBB26_8 ; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: bnez a2, .LBB26_10 +; RV32-NEXT: sgtz a4, a2 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: bnez a1, .LBB26_9 ; RV32-NEXT: .LBB26_7: -; RV32-NEXT: snez a4, a3 -; RV32-NEXT: bnez a1, .LBB26_11 +; RV32-NEXT: snez a5, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: bnez a0, .LBB26_10 +; RV32-NEXT: j .LBB26_11 ; RV32-NEXT: .LBB26_8: -; RV32-NEXT: snez a5, a0 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB26_12 -; RV32-NEXT: j .LBB26_13 -; RV32-NEXT: .LBB26_9: -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: beqz a2, .LBB26_7 -; RV32-NEXT: .LBB26_10: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: beqz a1, .LBB26_8 -; RV32-NEXT: .LBB26_11: # %entry +; RV32-NEXT: snez a4, a0 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: beqz a1, .LBB26_7 +; RV32-NEXT: .LBB26_9: # %entry ; RV32-NEXT: snez a5, a1 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: beqz a2, .LBB26_13 -; RV32-NEXT: .LBB26_12: # %entry +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: beqz a0, .LBB26_11 +; RV32-NEXT: .LBB26_10: # %entry ; RV32-NEXT: mv a5, a4 -; RV32-NEXT: .LBB26_13: # %entry -; RV32-NEXT: bnez a5, .LBB26_15 -; RV32-NEXT: # %bb.14: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB26_15: # %entry +; RV32-NEXT: .LBB26_11: # %entry +; RV32-NEXT: seqz a0, a5 +; RV32-NEXT: addi a2, a0, -1 +; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -2003,22 +1958,24 @@ ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt +; RV64-NEXT: mv a2, a1 ; RV64-NEXT: blez a1, .LBB26_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: li a1, 1 +; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB26_2: # %entry -; RV64-NEXT: beqz a1, .LBB26_4 -; RV64-NEXT: # %bb.3: # %entry ; RV64-NEXT: sgtz a1, a1 -; RV64-NEXT: beqz a1, .LBB26_5 -; RV64-NEXT: j .LBB26_6 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: beqz a2, .LBB26_4 +; RV64-NEXT: # %bb.3: # %entry +; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: j .LBB26_5 ; RV64-NEXT: .LBB26_4: ; RV64-NEXT: snez a1, a0 -; RV64-NEXT: bnez a1, .LBB26_6 ; RV64-NEXT: .LBB26_5: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB26_6: # %entry +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2052,14 +2009,17 @@ ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: bgeu a0, a4, .LBB27_10 ; RV32IF-NEXT: .LBB27_2: # %entry -; RV32IF-NEXT: bnez a1, .LBB27_11 +; RV32IF-NEXT: beqz a1, .LBB27_4 ; RV32IF-NEXT: .LBB27_3: # %entry -; RV32IF-NEXT: bgez a1, .LBB27_12 +; RV32IF-NEXT: mv a0, a3 ; RV32IF-NEXT: .LBB27_4: # %entry +; RV32IF-NEXT: slti a3, a1, 0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bltz a1, .LBB27_13 -; RV32IF-NEXT: .LBB27_5: # %entry -; RV32IF-NEXT: bgeu a2, a0, .LBB27_14 +; RV32IF-NEXT: bltz a1, .LBB27_11 +; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: bgeu a2, a0, .LBB27_12 ; RV32IF-NEXT: .LBB27_6: # %entry ; RV32IF-NEXT: li a2, -1 ; RV32IF-NEXT: beq a1, a2, .LBB27_8 @@ -2074,18 +2034,12 @@ ; RV32IF-NEXT: bltu a0, a4, .LBB27_2 ; RV32IF-NEXT: .LBB27_10: # %entry ; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: beqz a1, .LBB27_3 +; RV32IF-NEXT: bnez a1, .LBB27_3 +; RV32IF-NEXT: j .LBB27_4 ; RV32IF-NEXT: .LBB27_11: # %entry -; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: bltz a1, .LBB27_4 -; RV32IF-NEXT: .LBB27_12: # %entry -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bgez a1, .LBB27_5 -; RV32IF-NEXT: .LBB27_13: # %entry ; RV32IF-NEXT: lui a3, 524288 ; RV32IF-NEXT: bltu a2, a0, .LBB27_6 -; RV32IF-NEXT: .LBB27_14: # %entry +; RV32IF-NEXT: .LBB27_12: # %entry ; RV32IF-NEXT: lui a0, 524288 ; RV32IF-NEXT: li a2, -1 ; RV32IF-NEXT: bne a1, a2, .LBB27_7 @@ -2114,11 +2068,11 @@ ; ; RV32IFD-LABEL: stest_f64i32_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB27_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB27_2: # %entry +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i32_mm: @@ -2179,11 +2133,11 @@ ; ; RV32IFD-LABEL: utest_f64i32_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB28_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB28_2: # %entry +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: utest_f64i32_mm: @@ -2211,33 +2165,23 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixdfdi@plt -; RV32IF-NEXT: bnez a1, .LBB29_6 +; RV32IF-NEXT: slti a2, a1, 0 +; RV32IF-NEXT: beqz a1, .LBB29_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: bgez a1, .LBB29_7 +; RV32IF-NEXT: addi a3, a2, -1 +; RV32IF-NEXT: or a0, a3, a0 ; RV32IF-NEXT: .LBB29_2: # %entry -; RV32IF-NEXT: mv a2, a0 -; RV32IF-NEXT: blez a1, .LBB29_8 -; RV32IF-NEXT: .LBB29_3: # %entry -; RV32IF-NEXT: beqz a1, .LBB29_5 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: beqz a1, .LBB29_4 +; RV32IF-NEXT: # %bb.3: # %entry +; RV32IF-NEXT: sgtz a1, a1 +; RV32IF-NEXT: neg a1, a1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: .LBB29_4: # %entry -; RV32IF-NEXT: mv a0, a2 -; RV32IF-NEXT: .LBB29_5: # %entry ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB29_6: # %entry -; RV32IF-NEXT: slti a2, a1, 0 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: bltz a1, .LBB29_2 -; RV32IF-NEXT: .LBB29_7: # %entry -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: mv a2, a0 -; RV32IF-NEXT: bgtz a1, .LBB29_3 -; RV32IF-NEXT: .LBB29_8: # %entry -; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: bnez a1, .LBB29_4 -; RV32IF-NEXT: j .LBB29_5 ; ; RV64IF-LABEL: ustest_f64i32_mm: ; RV64IF: # %bb.0: # %entry @@ -2252,21 +2196,20 @@ ; RV64IF-NEXT: # %bb.1: # %entry ; RV64IF-NEXT: mv a0, a1 ; RV64IF-NEXT: .LBB29_2: # %entry -; RV64IF-NEXT: bgtz a0, .LBB29_4 -; RV64IF-NEXT: # %bb.3: # %entry -; RV64IF-NEXT: li a0, 0 -; RV64IF-NEXT: .LBB29_4: # %entry +; RV64IF-NEXT: sgtz a1, a0 +; RV64IF-NEXT: neg a1, a1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; ; RV32IFD-LABEL: ustest_f64i32_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: beqz a0, .LBB29_2 -; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz -; RV32IFD-NEXT: .LBB29_2: # %entry +; RV32IFD-NEXT: feq.d a1, fa0, fa0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i32_mm: @@ -2274,16 +2217,13 @@ ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: li a1, -1 ; RV64IFD-NEXT: srli a1, a1, 32 -; RV64IFD-NEXT: bge a0, a1, .LBB29_3 +; RV64IFD-NEXT: blt a0, a1, .LBB29_2 ; RV64IFD-NEXT: # %bb.1: # %entry -; RV64IFD-NEXT: blez a0, .LBB29_4 -; RV64IFD-NEXT: .LBB29_2: # %entry -; RV64IFD-NEXT: ret -; RV64IFD-NEXT: .LBB29_3: # %entry ; RV64IFD-NEXT: mv a0, a1 -; RV64IFD-NEXT: bgtz a0, .LBB29_2 -; RV64IFD-NEXT: .LBB29_4: # %entry -; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: .LBB29_2: # %entry +; RV64IFD-NEXT: sgtz a1, a0 +; RV64IFD-NEXT: neg a1, a1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i64 @@ -2296,11 +2236,11 @@ define i32 @stest_f32i32_mm(float %x) { ; RV32-LABEL: stest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: feq.s a0, fa0, fa0 -; RV32-NEXT: beqz a0, .LBB30_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-NEXT: .LBB30_2: # %entry +; RV32-NEXT: feq.s a1, fa0, fa0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: stest_f32i32_mm: @@ -2330,11 +2270,11 @@ define i32 @utest_f32i32_mm(float %x) { ; RV32-LABEL: utest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: feq.s a0, fa0, fa0 -; RV32-NEXT: beqz a0, .LBB31_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: fcvt.wu.s a0, fa0, rtz -; RV32-NEXT: .LBB31_2: # %entry +; RV32-NEXT: feq.s a1, fa0, fa0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: utest_f32i32_mm: @@ -2357,11 +2297,11 @@ define i32 @ustest_f32i32_mm(float %x) { ; RV32-LABEL: ustest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: feq.s a0, fa0, fa0 -; RV32-NEXT: beqz a0, .LBB32_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: fcvt.wu.s a0, fa0, rtz -; RV32-NEXT: .LBB32_2: # %entry +; RV32-NEXT: feq.s a1, fa0, fa0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i32_mm: @@ -2369,16 +2309,13 @@ ; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: bge a0, a1, .LBB32_3 +; RV64-NEXT: blt a0, a1, .LBB32_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: blez a0, .LBB32_4 -; RV64-NEXT: .LBB32_2: # %entry -; RV64-NEXT: ret -; RV64-NEXT: .LBB32_3: # %entry ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgtz a0, .LBB32_2 -; RV64-NEXT: .LBB32_4: # %entry -; RV64-NEXT: li a0, 0 +; RV64-NEXT: .LBB32_2: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret entry: %conv = fptosi float %x to i64 @@ -2405,14 +2342,17 @@ ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: bgeu a0, a4, .LBB33_10 ; RV32-NEXT: .LBB33_2: # %entry -; RV32-NEXT: bnez a1, .LBB33_11 +; RV32-NEXT: beqz a1, .LBB33_4 ; RV32-NEXT: .LBB33_3: # %entry -; RV32-NEXT: bgez a1, .LBB33_12 +; RV32-NEXT: mv a0, a3 ; RV32-NEXT: .LBB33_4: # %entry +; RV32-NEXT: slti a3, a1, 0 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltz a1, .LBB33_13 -; RV32-NEXT: .LBB33_5: # %entry -; RV32-NEXT: bgeu a2, a0, .LBB33_14 +; RV32-NEXT: bltz a1, .LBB33_11 +; RV32-NEXT: # %bb.5: # %entry +; RV32-NEXT: bgeu a2, a0, .LBB33_12 ; RV32-NEXT: .LBB33_6: # %entry ; RV32-NEXT: li a2, -1 ; RV32-NEXT: beq a1, a2, .LBB33_8 @@ -2427,18 +2367,12 @@ ; RV32-NEXT: bltu a0, a4, .LBB33_2 ; RV32-NEXT: .LBB33_10: # %entry ; RV32-NEXT: mv a0, a4 -; RV32-NEXT: beqz a1, .LBB33_3 +; RV32-NEXT: bnez a1, .LBB33_3 +; RV32-NEXT: j .LBB33_4 ; RV32-NEXT: .LBB33_11: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: bltz a1, .LBB33_4 -; RV32-NEXT: .LBB33_12: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgez a1, .LBB33_5 -; RV32-NEXT: .LBB33_13: # %entry ; RV32-NEXT: lui a3, 524288 ; RV32-NEXT: bltu a2, a0, .LBB33_6 -; RV32-NEXT: .LBB33_14: # %entry +; RV32-NEXT: .LBB33_12: # %entry ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: bne a1, a2, .LBB33_7 @@ -2526,34 +2460,23 @@ ; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bgez a1, .LBB35_6 +; RV32-NEXT: slti a2, a1, 0 +; RV32-NEXT: beqz a1, .LBB35_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bnez a1, .LBB35_7 +; RV32-NEXT: addi a3, a2, -1 +; RV32-NEXT: or a0, a3, a0 ; RV32-NEXT: .LBB35_2: # %entry -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: blez a2, .LBB35_8 -; RV32-NEXT: .LBB35_3: # %entry -; RV32-NEXT: beqz a2, .LBB35_5 -; RV32-NEXT: .LBB35_4: # %entry -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB35_5: # %entry +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: beqz a1, .LBB35_4 +; RV32-NEXT: # %bb.3: # %entry +; RV32-NEXT: sgtz a1, a1 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: .LBB35_4: # %entry ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret -; RV32-NEXT: .LBB35_6: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: beqz a1, .LBB35_2 -; RV32-NEXT: .LBB35_7: # %entry -; RV32-NEXT: slti a1, a1, 0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bgtz a2, .LBB35_3 -; RV32-NEXT: .LBB35_8: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: bnez a2, .LBB35_4 -; RV32-NEXT: j .LBB35_5 ; ; RV64-LABEL: ustest_f16i32_mm: ; RV64: # %bb.0: # %entry @@ -2570,10 +2493,9 @@ ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB35_2: # %entry -; RV64-NEXT: bgtz a0, .LBB35_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB35_4: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2754,10 +2676,9 @@ ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: mv a0, a1 ; RV32IF-NEXT: .LBB38_2: # %entry -; RV32IF-NEXT: bgtz a0, .LBB38_4 -; RV32IF-NEXT: # %bb.3: # %entry -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: .LBB38_4: # %entry +; RV32IF-NEXT: sgtz a1, a0 +; RV32IF-NEXT: neg a1, a1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret @@ -2775,10 +2696,9 @@ ; RV64IF-NEXT: # %bb.1: # %entry ; RV64IF-NEXT: mv a0, a1 ; RV64IF-NEXT: .LBB38_2: # %entry -; RV64IF-NEXT: bgtz a0, .LBB38_4 -; RV64IF-NEXT: # %bb.3: # %entry -; RV64IF-NEXT: li a0, 0 -; RV64IF-NEXT: .LBB38_4: # %entry +; RV64IF-NEXT: sgtz a1, a0 +; RV64IF-NEXT: neg a1, a1 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret @@ -2788,16 +2708,13 @@ ; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 16 ; RV32IFD-NEXT: addi a1, a1, -1 -; RV32IFD-NEXT: bge a0, a1, .LBB38_3 +; RV32IFD-NEXT: blt a0, a1, .LBB38_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: blez a0, .LBB38_4 -; RV32IFD-NEXT: .LBB38_2: # %entry -; RV32IFD-NEXT: ret -; RV32IFD-NEXT: .LBB38_3: # %entry ; RV32IFD-NEXT: mv a0, a1 -; RV32IFD-NEXT: bgtz a0, .LBB38_2 -; RV32IFD-NEXT: .LBB38_4: # %entry -; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: .LBB38_2: # %entry +; RV32IFD-NEXT: sgtz a1, a0 +; RV32IFD-NEXT: neg a1, a1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i16_mm: @@ -2805,16 +2722,13 @@ ; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 16 ; RV64IFD-NEXT: addiw a1, a1, -1 -; RV64IFD-NEXT: bge a0, a1, .LBB38_3 +; RV64IFD-NEXT: blt a0, a1, .LBB38_2 ; RV64IFD-NEXT: # %bb.1: # %entry -; RV64IFD-NEXT: blez a0, .LBB38_4 -; RV64IFD-NEXT: .LBB38_2: # %entry -; RV64IFD-NEXT: ret -; RV64IFD-NEXT: .LBB38_3: # %entry ; RV64IFD-NEXT: mv a0, a1 -; RV64IFD-NEXT: bgtz a0, .LBB38_2 -; RV64IFD-NEXT: .LBB38_4: # %entry -; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: .LBB38_2: # %entry +; RV64IFD-NEXT: sgtz a1, a0 +; RV64IFD-NEXT: neg a1, a1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i32 @@ -2905,16 +2819,13 @@ ; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: bge a0, a1, .LBB41_3 +; RV32-NEXT: blt a0, a1, .LBB41_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: blez a0, .LBB41_4 -; RV32-NEXT: .LBB41_2: # %entry -; RV32-NEXT: ret -; RV32-NEXT: .LBB41_3: # %entry ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bgtz a0, .LBB41_2 -; RV32-NEXT: .LBB41_4: # %entry -; RV32-NEXT: li a0, 0 +; RV32-NEXT: .LBB41_2: # %entry +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i16_mm: @@ -2922,16 +2833,13 @@ ; RV64-NEXT: fcvt.w.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: bge a0, a1, .LBB41_3 +; RV64-NEXT: blt a0, a1, .LBB41_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: blez a0, .LBB41_4 -; RV64-NEXT: .LBB41_2: # %entry -; RV64-NEXT: ret -; RV64-NEXT: .LBB41_3: # %entry ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgtz a0, .LBB41_2 -; RV64-NEXT: .LBB41_4: # %entry -; RV64-NEXT: li a0, 0 +; RV64-NEXT: .LBB41_2: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret entry: %conv = fptosi float %x to i32 @@ -3059,10 +2967,9 @@ ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB44_2: # %entry -; RV32-NEXT: bgtz a0, .LBB44_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB44_4: # %entry +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3082,10 +2989,9 @@ ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB44_2: # %entry -; RV64-NEXT: bgtz a0, .LBB44_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: .LBB44_4: # %entry +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3110,107 +3016,87 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a2, 20(sp) -; RV32IF-NEXT: lw a7, 8(sp) -; RV32IF-NEXT: lw a5, 12(sp) -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: addi a1, a4, -1 -; RV32IF-NEXT: mv a3, a7 -; RV32IF-NEXT: bne a5, a1, .LBB45_17 +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw t0, 8(sp) +; RV32IF-NEXT: lw a4, 12(sp) +; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: addi a6, a3, -1 +; RV32IF-NEXT: mv a2, t0 +; RV32IF-NEXT: beq a4, a6, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: or a6, a0, a2 -; RV32IF-NEXT: bnez a6, .LBB45_18 +; RV32IF-NEXT: sltu a2, a4, a6 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: or a2, a2, t0 ; RV32IF-NEXT: .LBB45_2: # %entry -; RV32IF-NEXT: mv a7, a5 -; RV32IF-NEXT: bgez a2, .LBB45_19 -; RV32IF-NEXT: .LBB45_3: # %entry -; RV32IF-NEXT: bgeu a5, a1, .LBB45_20 +; RV32IF-NEXT: or a7, a1, a0 +; RV32IF-NEXT: slti a5, a0, 0 +; RV32IF-NEXT: bnez a7, .LBB45_16 +; RV32IF-NEXT: # %bb.3: # %entry +; RV32IF-NEXT: mv t0, a4 +; RV32IF-NEXT: bgez a0, .LBB45_17 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: bnez a6, .LBB45_21 +; RV32IF-NEXT: bgeu a4, a6, .LBB45_18 ; RV32IF-NEXT: .LBB45_5: # %entry -; RV32IF-NEXT: li a6, 0 -; RV32IF-NEXT: bnez a2, .LBB45_22 +; RV32IF-NEXT: beqz a7, .LBB45_7 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: bgez a2, .LBB45_23 +; RV32IF-NEXT: mv a4, t0 ; RV32IF-NEXT: .LBB45_7: # %entry -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: bltz a2, .LBB45_24 -; RV32IF-NEXT: .LBB45_8: # %entry -; RV32IF-NEXT: mv a1, a5 -; RV32IF-NEXT: bltu a4, a5, .LBB45_10 +; RV32IF-NEXT: srai a6, a0, 31 +; RV32IF-NEXT: and a1, a6, a1 +; RV32IF-NEXT: seqz a6, a0 +; RV32IF-NEXT: neg a5, a5 +; RV32IF-NEXT: and a5, a5, a0 +; RV32IF-NEXT: addi a6, a6, -1 +; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: bgez a5, .LBB45_9 +; RV32IF-NEXT: # %bb.8: # %entry +; RV32IF-NEXT: lui a0, 524288 ; RV32IF-NEXT: .LBB45_9: # %entry +; RV32IF-NEXT: and a6, a6, a1 +; RV32IF-NEXT: mv a1, a4 +; RV32IF-NEXT: bltu a3, a4, .LBB45_11 +; RV32IF-NEXT: # %bb.10: # %entry ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: .LBB45_10: # %entry -; RV32IF-NEXT: and a6, a6, a2 +; RV32IF-NEXT: .LBB45_11: # %entry +; RV32IF-NEXT: and a6, a6, a5 ; RV32IF-NEXT: li a7, -1 -; RV32IF-NEXT: bne a6, a7, .LBB45_25 -; RV32IF-NEXT: # %bb.11: # %entry -; RV32IF-NEXT: mv t0, a3 -; RV32IF-NEXT: bgeu a4, a5, .LBB45_26 -; RV32IF-NEXT: .LBB45_12: # %entry -; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: bne a5, a4, .LBB45_27 +; RV32IF-NEXT: bne a6, a7, .LBB45_19 +; RV32IF-NEXT: # %bb.12: # %entry +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: bne a4, a3, .LBB45_20 ; RV32IF-NEXT: .LBB45_13: # %entry -; RV32IF-NEXT: bltz a2, .LBB45_28 +; RV32IF-NEXT: beq a6, a7, .LBB45_15 ; RV32IF-NEXT: .LBB45_14: # %entry -; RV32IF-NEXT: beq a6, a7, .LBB45_16 +; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: and a0, a0, a2 ; RV32IF-NEXT: .LBB45_15: # %entry -; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: .LBB45_16: # %entry ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB45_16: # %entry +; RV32IF-NEXT: addi a2, a5, -1 +; RV32IF-NEXT: or a2, a2, t0 +; RV32IF-NEXT: mv t0, a4 +; RV32IF-NEXT: bltz a0, .LBB45_4 ; RV32IF-NEXT: .LBB45_17: # %entry -; RV32IF-NEXT: sltu a3, a5, a1 -; RV32IF-NEXT: addi a3, a3, -1 -; RV32IF-NEXT: or a3, a3, a7 -; RV32IF-NEXT: or a6, a0, a2 -; RV32IF-NEXT: beqz a6, .LBB45_2 +; RV32IF-NEXT: mv t0, a6 +; RV32IF-NEXT: bltu a4, a6, .LBB45_5 ; RV32IF-NEXT: .LBB45_18: # %entry -; RV32IF-NEXT: slti a3, a2, 0 -; RV32IF-NEXT: addi a3, a3, -1 -; RV32IF-NEXT: or a3, a3, a7 -; RV32IF-NEXT: mv a7, a5 -; RV32IF-NEXT: bltz a2, .LBB45_3 +; RV32IF-NEXT: mv a4, a6 +; RV32IF-NEXT: bnez a7, .LBB45_6 +; RV32IF-NEXT: j .LBB45_7 ; RV32IF-NEXT: .LBB45_19: # %entry -; RV32IF-NEXT: mv a7, a1 -; RV32IF-NEXT: bltu a5, a1, .LBB45_4 -; RV32IF-NEXT: .LBB45_20: # %entry -; RV32IF-NEXT: mv a5, a1 -; RV32IF-NEXT: beqz a6, .LBB45_5 -; RV32IF-NEXT: .LBB45_21: # %entry -; RV32IF-NEXT: mv a5, a7 -; RV32IF-NEXT: li a6, 0 -; RV32IF-NEXT: beqz a2, .LBB45_6 -; RV32IF-NEXT: .LBB45_22: # %entry -; RV32IF-NEXT: srai a1, a2, 31 -; RV32IF-NEXT: and a6, a1, a0 -; RV32IF-NEXT: bltz a2, .LBB45_7 -; RV32IF-NEXT: .LBB45_23: # %entry -; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: bgez a2, .LBB45_8 -; RV32IF-NEXT: .LBB45_24: # %entry -; RV32IF-NEXT: lui a0, 524288 -; RV32IF-NEXT: mv a1, a5 -; RV32IF-NEXT: bgeu a4, a5, .LBB45_9 -; RV32IF-NEXT: j .LBB45_10 -; RV32IF-NEXT: .LBB45_25: # %entry ; RV32IF-NEXT: mv a1, a0 -; RV32IF-NEXT: mv t0, a3 -; RV32IF-NEXT: bltu a4, a5, .LBB45_12 -; RV32IF-NEXT: .LBB45_26: # %entry -; RV32IF-NEXT: li t0, 0 -; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: beq a5, a4, .LBB45_13 -; RV32IF-NEXT: .LBB45_27: # %entry -; RV32IF-NEXT: mv a0, t0 -; RV32IF-NEXT: bgez a2, .LBB45_14 -; RV32IF-NEXT: .LBB45_28: # %entry -; RV32IF-NEXT: li a3, 0 -; RV32IF-NEXT: bne a6, a7, .LBB45_15 -; RV32IF-NEXT: j .LBB45_16 +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: beq a4, a3, .LBB45_13 +; RV32IF-NEXT: .LBB45_20: # %entry +; RV32IF-NEXT: sltu a0, a3, a4 +; RV32IF-NEXT: neg a0, a0 +; RV32IF-NEXT: and a0, a0, a2 +; RV32IF-NEXT: bne a6, a7, .LBB45_14 +; RV32IF-NEXT: j .LBB45_15 ; ; RV64IF-LABEL: stest_f64i64_mm: ; RV64IF: # %bb.0: # %entry @@ -3222,46 +3108,44 @@ ; RV64IF-NEXT: li a2, -1 ; RV64IF-NEXT: srli a4, a2, 1 ; RV64IF-NEXT: mv a3, a0 -; RV64IF-NEXT: bgez a1, .LBB45_10 +; RV64IF-NEXT: bgez a1, .LBB45_9 ; RV64IF-NEXT: # %bb.1: # %entry -; RV64IF-NEXT: bgeu a0, a4, .LBB45_11 +; RV64IF-NEXT: bgeu a0, a4, .LBB45_10 ; RV64IF-NEXT: .LBB45_2: # %entry -; RV64IF-NEXT: bnez a1, .LBB45_12 +; RV64IF-NEXT: beqz a1, .LBB45_4 ; RV64IF-NEXT: .LBB45_3: # %entry -; RV64IF-NEXT: bltz a1, .LBB45_5 +; RV64IF-NEXT: mv a0, a3 ; RV64IF-NEXT: .LBB45_4: # %entry -; RV64IF-NEXT: li a1, 0 -; RV64IF-NEXT: .LBB45_5: # %entry +; RV64IF-NEXT: slti a3, a1, 0 +; RV64IF-NEXT: neg a3, a3 +; RV64IF-NEXT: and a1, a3, a1 ; RV64IF-NEXT: slli a4, a2, 63 ; RV64IF-NEXT: mv a3, a0 -; RV64IF-NEXT: bltz a1, .LBB45_13 -; RV64IF-NEXT: # %bb.6: # %entry -; RV64IF-NEXT: bgeu a4, a0, .LBB45_14 +; RV64IF-NEXT: bltz a1, .LBB45_11 +; RV64IF-NEXT: # %bb.5: # %entry +; RV64IF-NEXT: bgeu a4, a0, .LBB45_12 +; RV64IF-NEXT: .LBB45_6: # %entry +; RV64IF-NEXT: beq a1, a2, .LBB45_8 ; RV64IF-NEXT: .LBB45_7: # %entry -; RV64IF-NEXT: beq a1, a2, .LBB45_9 -; RV64IF-NEXT: .LBB45_8: # %entry ; RV64IF-NEXT: mv a0, a3 -; RV64IF-NEXT: .LBB45_9: # %entry +; RV64IF-NEXT: .LBB45_8: # %entry ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret -; RV64IF-NEXT: .LBB45_10: # %entry +; RV64IF-NEXT: .LBB45_9: # %entry ; RV64IF-NEXT: mv a3, a4 ; RV64IF-NEXT: bltu a0, a4, .LBB45_2 -; RV64IF-NEXT: .LBB45_11: # %entry +; RV64IF-NEXT: .LBB45_10: # %entry ; RV64IF-NEXT: mv a0, a4 -; RV64IF-NEXT: beqz a1, .LBB45_3 -; RV64IF-NEXT: .LBB45_12: # %entry -; RV64IF-NEXT: mv a0, a3 -; RV64IF-NEXT: bgez a1, .LBB45_4 -; RV64IF-NEXT: j .LBB45_5 -; RV64IF-NEXT: .LBB45_13: # %entry +; RV64IF-NEXT: bnez a1, .LBB45_3 +; RV64IF-NEXT: j .LBB45_4 +; RV64IF-NEXT: .LBB45_11: # %entry ; RV64IF-NEXT: mv a3, a4 -; RV64IF-NEXT: bltu a4, a0, .LBB45_7 -; RV64IF-NEXT: .LBB45_14: # %entry +; RV64IF-NEXT: bltu a4, a0, .LBB45_6 +; RV64IF-NEXT: .LBB45_12: # %entry ; RV64IF-NEXT: mv a0, a4 -; RV64IF-NEXT: bne a1, a2, .LBB45_8 -; RV64IF-NEXT: j .LBB45_9 +; RV64IF-NEXT: bne a1, a2, .LBB45_7 +; RV64IF-NEXT: j .LBB45_8 ; ; RV32IFD-LABEL: stest_f64i64_mm: ; RV32IFD: # %bb.0: # %entry @@ -3271,115 +3155,95 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a2, 20(sp) -; RV32IFD-NEXT: lw a7, 8(sp) -; RV32IFD-NEXT: lw a5, 12(sp) -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: addi a1, a4, -1 -; RV32IFD-NEXT: mv a3, a7 -; RV32IFD-NEXT: bne a5, a1, .LBB45_17 +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw t0, 8(sp) +; RV32IFD-NEXT: lw a4, 12(sp) +; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: addi a6, a3, -1 +; RV32IFD-NEXT: mv a2, t0 +; RV32IFD-NEXT: beq a4, a6, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: or a6, a0, a2 -; RV32IFD-NEXT: bnez a6, .LBB45_18 +; RV32IFD-NEXT: sltu a2, a4, a6 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: or a2, a2, t0 ; RV32IFD-NEXT: .LBB45_2: # %entry -; RV32IFD-NEXT: mv a7, a5 -; RV32IFD-NEXT: bgez a2, .LBB45_19 -; RV32IFD-NEXT: .LBB45_3: # %entry -; RV32IFD-NEXT: bgeu a5, a1, .LBB45_20 +; RV32IFD-NEXT: or a7, a1, a0 +; RV32IFD-NEXT: slti a5, a0, 0 +; RV32IFD-NEXT: bnez a7, .LBB45_16 +; RV32IFD-NEXT: # %bb.3: # %entry +; RV32IFD-NEXT: mv t0, a4 +; RV32IFD-NEXT: bgez a0, .LBB45_17 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: bnez a6, .LBB45_21 +; RV32IFD-NEXT: bgeu a4, a6, .LBB45_18 ; RV32IFD-NEXT: .LBB45_5: # %entry -; RV32IFD-NEXT: li a6, 0 -; RV32IFD-NEXT: bnez a2, .LBB45_22 +; RV32IFD-NEXT: beqz a7, .LBB45_7 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: bgez a2, .LBB45_23 +; RV32IFD-NEXT: mv a4, t0 ; RV32IFD-NEXT: .LBB45_7: # %entry -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: bltz a2, .LBB45_24 -; RV32IFD-NEXT: .LBB45_8: # %entry -; RV32IFD-NEXT: mv a1, a5 -; RV32IFD-NEXT: bltu a4, a5, .LBB45_10 +; RV32IFD-NEXT: srai a6, a0, 31 +; RV32IFD-NEXT: and a1, a6, a1 +; RV32IFD-NEXT: seqz a6, a0 +; RV32IFD-NEXT: neg a5, a5 +; RV32IFD-NEXT: and a5, a5, a0 +; RV32IFD-NEXT: addi a6, a6, -1 +; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: bgez a5, .LBB45_9 +; RV32IFD-NEXT: # %bb.8: # %entry +; RV32IFD-NEXT: lui a0, 524288 ; RV32IFD-NEXT: .LBB45_9: # %entry +; RV32IFD-NEXT: and a6, a6, a1 +; RV32IFD-NEXT: mv a1, a4 +; RV32IFD-NEXT: bltu a3, a4, .LBB45_11 +; RV32IFD-NEXT: # %bb.10: # %entry ; RV32IFD-NEXT: lui a1, 524288 -; RV32IFD-NEXT: .LBB45_10: # %entry -; RV32IFD-NEXT: and a6, a6, a2 +; RV32IFD-NEXT: .LBB45_11: # %entry +; RV32IFD-NEXT: and a6, a6, a5 ; RV32IFD-NEXT: li a7, -1 -; RV32IFD-NEXT: bne a6, a7, .LBB45_25 -; RV32IFD-NEXT: # %bb.11: # %entry -; RV32IFD-NEXT: mv t0, a3 -; RV32IFD-NEXT: bgeu a4, a5, .LBB45_26 -; RV32IFD-NEXT: .LBB45_12: # %entry -; RV32IFD-NEXT: mv a0, a3 -; RV32IFD-NEXT: bne a5, a4, .LBB45_27 +; RV32IFD-NEXT: bne a6, a7, .LBB45_19 +; RV32IFD-NEXT: # %bb.12: # %entry +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: bne a4, a3, .LBB45_20 ; RV32IFD-NEXT: .LBB45_13: # %entry -; RV32IFD-NEXT: bltz a2, .LBB45_28 +; RV32IFD-NEXT: beq a6, a7, .LBB45_15 ; RV32IFD-NEXT: .LBB45_14: # %entry -; RV32IFD-NEXT: beq a6, a7, .LBB45_16 +; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: and a0, a0, a2 ; RV32IFD-NEXT: .LBB45_15: # %entry -; RV32IFD-NEXT: mv a0, a3 -; RV32IFD-NEXT: .LBB45_16: # %entry ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB45_16: # %entry +; RV32IFD-NEXT: addi a2, a5, -1 +; RV32IFD-NEXT: or a2, a2, t0 +; RV32IFD-NEXT: mv t0, a4 +; RV32IFD-NEXT: bltz a0, .LBB45_4 ; RV32IFD-NEXT: .LBB45_17: # %entry -; RV32IFD-NEXT: sltu a3, a5, a1 -; RV32IFD-NEXT: addi a3, a3, -1 -; RV32IFD-NEXT: or a3, a3, a7 -; RV32IFD-NEXT: or a6, a0, a2 -; RV32IFD-NEXT: beqz a6, .LBB45_2 +; RV32IFD-NEXT: mv t0, a6 +; RV32IFD-NEXT: bltu a4, a6, .LBB45_5 ; RV32IFD-NEXT: .LBB45_18: # %entry -; RV32IFD-NEXT: slti a3, a2, 0 -; RV32IFD-NEXT: addi a3, a3, -1 -; RV32IFD-NEXT: or a3, a3, a7 -; RV32IFD-NEXT: mv a7, a5 -; RV32IFD-NEXT: bltz a2, .LBB45_3 +; RV32IFD-NEXT: mv a4, a6 +; RV32IFD-NEXT: bnez a7, .LBB45_6 +; RV32IFD-NEXT: j .LBB45_7 ; RV32IFD-NEXT: .LBB45_19: # %entry -; RV32IFD-NEXT: mv a7, a1 -; RV32IFD-NEXT: bltu a5, a1, .LBB45_4 -; RV32IFD-NEXT: .LBB45_20: # %entry -; RV32IFD-NEXT: mv a5, a1 -; RV32IFD-NEXT: beqz a6, .LBB45_5 -; RV32IFD-NEXT: .LBB45_21: # %entry -; RV32IFD-NEXT: mv a5, a7 -; RV32IFD-NEXT: li a6, 0 -; RV32IFD-NEXT: beqz a2, .LBB45_6 -; RV32IFD-NEXT: .LBB45_22: # %entry -; RV32IFD-NEXT: srai a1, a2, 31 -; RV32IFD-NEXT: and a6, a1, a0 -; RV32IFD-NEXT: bltz a2, .LBB45_7 -; RV32IFD-NEXT: .LBB45_23: # %entry -; RV32IFD-NEXT: li a2, 0 -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: bgez a2, .LBB45_8 -; RV32IFD-NEXT: .LBB45_24: # %entry -; RV32IFD-NEXT: lui a0, 524288 -; RV32IFD-NEXT: mv a1, a5 -; RV32IFD-NEXT: bgeu a4, a5, .LBB45_9 -; RV32IFD-NEXT: j .LBB45_10 -; RV32IFD-NEXT: .LBB45_25: # %entry ; RV32IFD-NEXT: mv a1, a0 -; RV32IFD-NEXT: mv t0, a3 -; RV32IFD-NEXT: bltu a4, a5, .LBB45_12 -; RV32IFD-NEXT: .LBB45_26: # %entry -; RV32IFD-NEXT: li t0, 0 -; RV32IFD-NEXT: mv a0, a3 -; RV32IFD-NEXT: beq a5, a4, .LBB45_13 -; RV32IFD-NEXT: .LBB45_27: # %entry -; RV32IFD-NEXT: mv a0, t0 -; RV32IFD-NEXT: bgez a2, .LBB45_14 -; RV32IFD-NEXT: .LBB45_28: # %entry -; RV32IFD-NEXT: li a3, 0 -; RV32IFD-NEXT: bne a6, a7, .LBB45_15 -; RV32IFD-NEXT: j .LBB45_16 +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: beq a4, a3, .LBB45_13 +; RV32IFD-NEXT: .LBB45_20: # %entry +; RV32IFD-NEXT: sltu a0, a3, a4 +; RV32IFD-NEXT: neg a0, a0 +; RV32IFD-NEXT: and a0, a0, a2 +; RV32IFD-NEXT: bne a6, a7, .LBB45_14 +; RV32IFD-NEXT: j .LBB45_15 ; ; RV64IFD-LABEL: stest_f64i64_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: beqz a0, .LBB45_2 -; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz -; RV64IFD-NEXT: .LBB45_2: # %entry +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i128 @@ -3400,40 +3264,24 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixunsdfti@plt -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a0, .LBB46_3 -; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: beqz a2, .LBB46_4 -; RV32IF-NEXT: .LBB46_2: -; RV32IF-NEXT: lw a4, 8(sp) -; RV32IF-NEXT: j .LBB46_5 -; RV32IF-NEXT: .LBB46_3: -; RV32IF-NEXT: seqz a2, a3 -; RV32IF-NEXT: bnez a2, .LBB46_2 -; RV32IF-NEXT: .LBB46_4: # %entry -; RV32IF-NEXT: mv a4, a1 -; RV32IF-NEXT: .LBB46_5: # %entry -; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a0 -; RV32IF-NEXT: mv a0, a1 -; RV32IF-NEXT: beqz a3, .LBB46_7 -; RV32IF-NEXT: # %bb.6: # %entry -; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: .LBB46_7: # %entry -; RV32IF-NEXT: bnez a2, .LBB46_9 -; RV32IF-NEXT: # %bb.8: # %entry -; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: bnez a3, .LBB46_10 -; RV32IF-NEXT: j .LBB46_11 -; RV32IF-NEXT: .LBB46_9: +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) -; RV32IF-NEXT: beqz a3, .LBB46_11 -; RV32IF-NEXT: .LBB46_10: # %entry -; RV32IF-NEXT: mv a1, a2 -; RV32IF-NEXT: .LBB46_11: # %entry +; RV32IF-NEXT: lw a3, 8(sp) +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 +; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a3, a4, a3 +; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: addi a1, a0, -1 +; RV32IF-NEXT: and a0, a1, a3 +; RV32IF-NEXT: and a2, a4, a2 +; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3445,17 +3293,13 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunsdfti@plt -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: beqz a1, .LBB46_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: .LBB46_2: # %entry -; RV64-NEXT: li a3, 1 -; RV64-NEXT: beq a1, a3, .LBB46_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: .LBB46_4: # %entry +; RV64-NEXT: snez a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3468,40 +3312,24 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixunsdfti@plt -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: beqz a0, .LBB46_3 -; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: mv a2, a1 -; RV32IFD-NEXT: beqz a2, .LBB46_4 -; RV32IFD-NEXT: .LBB46_2: -; RV32IFD-NEXT: lw a4, 8(sp) -; RV32IFD-NEXT: j .LBB46_5 -; RV32IFD-NEXT: .LBB46_3: -; RV32IFD-NEXT: seqz a2, a3 -; RV32IFD-NEXT: bnez a2, .LBB46_2 -; RV32IFD-NEXT: .LBB46_4: # %entry -; RV32IFD-NEXT: mv a4, a1 -; RV32IFD-NEXT: .LBB46_5: # %entry -; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a0 -; RV32IFD-NEXT: mv a0, a1 -; RV32IFD-NEXT: beqz a3, .LBB46_7 -; RV32IFD-NEXT: # %bb.6: # %entry -; RV32IFD-NEXT: mv a0, a4 -; RV32IFD-NEXT: .LBB46_7: # %entry -; RV32IFD-NEXT: bnez a2, .LBB46_9 -; RV32IFD-NEXT: # %bb.8: # %entry -; RV32IFD-NEXT: mv a2, a1 -; RV32IFD-NEXT: bnez a3, .LBB46_10 -; RV32IFD-NEXT: j .LBB46_11 -; RV32IFD-NEXT: .LBB46_9: +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) -; RV32IFD-NEXT: beqz a3, .LBB46_11 -; RV32IFD-NEXT: .LBB46_10: # %entry -; RV32IFD-NEXT: mv a1, a2 -; RV32IFD-NEXT: .LBB46_11: # %entry +; RV32IFD-NEXT: lw a3, 8(sp) +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 +; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a3, a4, a3 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: addi a1, a0, -1 +; RV32IFD-NEXT: and a0, a1, a3 +; RV32IFD-NEXT: and a2, a4, a2 +; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3523,109 +3351,68 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a2, 20(sp) -; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a2, .LBB47_3 +; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: li a3, 1 +; RV32IF-NEXT: mv a6, a1 +; RV32IF-NEXT: bltz a0, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a0, a2, 0 -; RV32IF-NEXT: beqz a0, .LBB47_4 -; RV32IF-NEXT: .LBB47_2: -; RV32IF-NEXT: lw a5, 12(sp) -; RV32IF-NEXT: j .LBB47_5 -; RV32IF-NEXT: .LBB47_3: -; RV32IF-NEXT: seqz a0, a3 -; RV32IF-NEXT: bnez a0, .LBB47_2 +; RV32IF-NEXT: li a6, 1 +; RV32IF-NEXT: .LBB47_2: # %entry +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: bltu a1, a3, .LBB47_4 +; RV32IF-NEXT: # %bb.3: # %entry +; RV32IF-NEXT: li a2, 1 ; RV32IF-NEXT: .LBB47_4: # %entry -; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: .LBB47_5: # %entry -; RV32IF-NEXT: xori a1, a3, 1 -; RV32IF-NEXT: or a4, a1, a2 -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a4, .LBB47_7 -; RV32IF-NEXT: # %bb.6: # %entry -; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: lw a5, 12(sp) +; RV32IF-NEXT: lw a3, 8(sp) +; RV32IF-NEXT: slti a4, a0, 0 +; RV32IF-NEXT: beqz a0, .LBB47_6 +; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: mv a2, a6 +; RV32IF-NEXT: mv a6, a4 +; RV32IF-NEXT: j .LBB47_7 +; RV32IF-NEXT: .LBB47_6: +; RV32IF-NEXT: seqz a6, a1 ; RV32IF-NEXT: .LBB47_7: # %entry -; RV32IF-NEXT: bnez a0, .LBB47_9 +; RV32IF-NEXT: seqz a6, a6 +; RV32IF-NEXT: addi a6, a6, -1 +; RV32IF-NEXT: and a3, a6, a3 +; RV32IF-NEXT: xori a1, a1, 1 +; RV32IF-NEXT: or a1, a1, a0 +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a3, a1, a3 +; RV32IF-NEXT: and a5, a6, a5 +; RV32IF-NEXT: and a1, a1, a5 +; RV32IF-NEXT: neg a4, a4 +; RV32IF-NEXT: and a4, a4, a0 +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: beqz a1, .LBB47_9 ; RV32IF-NEXT: # %bb.8: # %entry -; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: bnez a4, .LBB47_10 -; RV32IF-NEXT: j .LBB47_11 -; RV32IF-NEXT: .LBB47_9: -; RV32IF-NEXT: lw a5, 8(sp) -; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: seqz a0, a1 +; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: and a0, a0, a3 +; RV32IF-NEXT: .LBB47_9: # %entry ; RV32IF-NEXT: beqz a4, .LBB47_11 -; RV32IF-NEXT: .LBB47_10: # %entry -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: .LBB47_11: # %entry -; RV32IF-NEXT: li a5, 1 -; RV32IF-NEXT: mv a4, a3 -; RV32IF-NEXT: bgez a2, .LBB47_17 -; RV32IF-NEXT: # %bb.12: # %entry -; RV32IF-NEXT: bgeu a3, a5, .LBB47_18 +; RV32IF-NEXT: # %bb.10: # %entry +; RV32IF-NEXT: sgtz a5, a4 +; RV32IF-NEXT: or a2, a2, a4 +; RV32IF-NEXT: bnez a2, .LBB47_12 +; RV32IF-NEXT: j .LBB47_13 +; RV32IF-NEXT: .LBB47_11: +; RV32IF-NEXT: snez a5, a2 +; RV32IF-NEXT: or a2, a2, a4 +; RV32IF-NEXT: beqz a2, .LBB47_13 +; RV32IF-NEXT: .LBB47_12: # %entry +; RV32IF-NEXT: seqz a0, a5 +; RV32IF-NEXT: addi a2, a0, -1 +; RV32IF-NEXT: and a0, a2, a3 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: .LBB47_13: # %entry -; RV32IF-NEXT: bnez a2, .LBB47_19 -; RV32IF-NEXT: .LBB47_14: # %entry -; RV32IF-NEXT: bgez a2, .LBB47_20 -; RV32IF-NEXT: .LBB47_15: # %entry -; RV32IF-NEXT: beqz a2, .LBB47_21 -; RV32IF-NEXT: .LBB47_16: # %entry -; RV32IF-NEXT: sgtz a4, a2 -; RV32IF-NEXT: mv a5, a0 -; RV32IF-NEXT: beqz a4, .LBB47_22 -; RV32IF-NEXT: j .LBB47_23 -; RV32IF-NEXT: .LBB47_17: # %entry -; RV32IF-NEXT: li a4, 1 -; RV32IF-NEXT: bltu a3, a5, .LBB47_13 -; RV32IF-NEXT: .LBB47_18: # %entry -; RV32IF-NEXT: li a3, 1 -; RV32IF-NEXT: beqz a2, .LBB47_14 -; RV32IF-NEXT: .LBB47_19: # %entry -; RV32IF-NEXT: mv a3, a4 -; RV32IF-NEXT: bltz a2, .LBB47_15 -; RV32IF-NEXT: .LBB47_20: # %entry -; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: bnez a2, .LBB47_16 -; RV32IF-NEXT: .LBB47_21: -; RV32IF-NEXT: snez a4, a3 -; RV32IF-NEXT: mv a5, a0 -; RV32IF-NEXT: bnez a4, .LBB47_23 -; RV32IF-NEXT: .LBB47_22: # %entry -; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: .LBB47_23: # %entry -; RV32IF-NEXT: mv a6, a0 -; RV32IF-NEXT: beqz a1, .LBB47_30 -; RV32IF-NEXT: # %bb.24: # %entry -; RV32IF-NEXT: bnez a1, .LBB47_31 -; RV32IF-NEXT: .LBB47_25: # %entry -; RV32IF-NEXT: or a2, a3, a2 -; RV32IF-NEXT: bnez a2, .LBB47_32 -; RV32IF-NEXT: .LBB47_26: # %entry -; RV32IF-NEXT: mv a3, a1 -; RV32IF-NEXT: beqz a4, .LBB47_33 -; RV32IF-NEXT: .LBB47_27: # %entry -; RV32IF-NEXT: beqz a2, .LBB47_29 -; RV32IF-NEXT: .LBB47_28: # %entry -; RV32IF-NEXT: mv a1, a3 -; RV32IF-NEXT: .LBB47_29: # %entry ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB47_30: # %entry -; RV32IF-NEXT: li a6, 0 -; RV32IF-NEXT: beqz a1, .LBB47_25 -; RV32IF-NEXT: .LBB47_31: # %entry -; RV32IF-NEXT: mv a0, a6 -; RV32IF-NEXT: or a2, a3, a2 -; RV32IF-NEXT: beqz a2, .LBB47_26 -; RV32IF-NEXT: .LBB47_32: # %entry -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: mv a3, a1 -; RV32IF-NEXT: bnez a4, .LBB47_27 -; RV32IF-NEXT: .LBB47_33: # %entry -; RV32IF-NEXT: li a3, 0 -; RV32IF-NEXT: bnez a2, .LBB47_28 -; RV32IF-NEXT: j .LBB47_29 ; ; RV64-LABEL: ustest_f64i64_mm: ; RV64: # %bb.0: # %entry @@ -3634,37 +3421,27 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixdfti@plt -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a4, 1 -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: bgtz a1, .LBB47_6 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: blez a1, .LBB47_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: bne a1, a4, .LBB47_7 +; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB47_2: # %entry -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: blez a3, .LBB47_8 -; RV64-NEXT: .LBB47_3: # %entry -; RV64-NEXT: beqz a3, .LBB47_5 +; RV64-NEXT: sgtz a3, a1 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: beqz a2, .LBB47_4 +; RV64-NEXT: # %bb.3: # %entry +; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: .LBB47_4: # %entry -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB47_5: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret -; RV64-NEXT: .LBB47_6: # %entry -; RV64-NEXT: li a2, 0 -; RV64-NEXT: li a3, 1 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: beq a1, a4, .LBB47_2 -; RV64-NEXT: .LBB47_7: # %entry -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bgtz a3, .LBB47_3 -; RV64-NEXT: .LBB47_8: # %entry -; RV64-NEXT: li a1, 0 -; RV64-NEXT: bnez a3, .LBB47_4 -; RV64-NEXT: j .LBB47_5 ; ; RV32IFD-LABEL: ustest_f64i64_mm: ; RV32IFD: # %bb.0: # %entry @@ -3674,109 +3451,68 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a2, 20(sp) -; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a2, .LBB47_3 +; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: li a3, 1 +; RV32IFD-NEXT: mv a6, a1 +; RV32IFD-NEXT: bltz a0, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a0, a2, 0 -; RV32IFD-NEXT: beqz a0, .LBB47_4 -; RV32IFD-NEXT: .LBB47_2: -; RV32IFD-NEXT: lw a5, 12(sp) -; RV32IFD-NEXT: j .LBB47_5 -; RV32IFD-NEXT: .LBB47_3: -; RV32IFD-NEXT: seqz a0, a3 -; RV32IFD-NEXT: bnez a0, .LBB47_2 +; RV32IFD-NEXT: li a6, 1 +; RV32IFD-NEXT: .LBB47_2: # %entry +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: bltu a1, a3, .LBB47_4 +; RV32IFD-NEXT: # %bb.3: # %entry +; RV32IFD-NEXT: li a2, 1 ; RV32IFD-NEXT: .LBB47_4: # %entry -; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: .LBB47_5: # %entry -; RV32IFD-NEXT: xori a1, a3, 1 -; RV32IFD-NEXT: or a4, a1, a2 -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: beqz a4, .LBB47_7 -; RV32IFD-NEXT: # %bb.6: # %entry -; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: lw a5, 12(sp) +; RV32IFD-NEXT: lw a3, 8(sp) +; RV32IFD-NEXT: slti a4, a0, 0 +; RV32IFD-NEXT: beqz a0, .LBB47_6 +; RV32IFD-NEXT: # %bb.5: # %entry +; RV32IFD-NEXT: mv a2, a6 +; RV32IFD-NEXT: mv a6, a4 +; RV32IFD-NEXT: j .LBB47_7 +; RV32IFD-NEXT: .LBB47_6: +; RV32IFD-NEXT: seqz a6, a1 ; RV32IFD-NEXT: .LBB47_7: # %entry -; RV32IFD-NEXT: bnez a0, .LBB47_9 +; RV32IFD-NEXT: seqz a6, a6 +; RV32IFD-NEXT: addi a6, a6, -1 +; RV32IFD-NEXT: and a3, a6, a3 +; RV32IFD-NEXT: xori a1, a1, 1 +; RV32IFD-NEXT: or a1, a1, a0 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a3, a1, a3 +; RV32IFD-NEXT: and a5, a6, a5 +; RV32IFD-NEXT: and a1, a1, a5 +; RV32IFD-NEXT: neg a4, a4 +; RV32IFD-NEXT: and a4, a4, a0 +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: beqz a1, .LBB47_9 ; RV32IFD-NEXT: # %bb.8: # %entry -; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: bnez a4, .LBB47_10 -; RV32IFD-NEXT: j .LBB47_11 -; RV32IFD-NEXT: .LBB47_9: -; RV32IFD-NEXT: lw a5, 8(sp) -; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: seqz a0, a1 +; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: and a0, a0, a3 +; RV32IFD-NEXT: .LBB47_9: # %entry ; RV32IFD-NEXT: beqz a4, .LBB47_11 -; RV32IFD-NEXT: .LBB47_10: # %entry -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: .LBB47_11: # %entry -; RV32IFD-NEXT: li a5, 1 -; RV32IFD-NEXT: mv a4, a3 -; RV32IFD-NEXT: bgez a2, .LBB47_17 -; RV32IFD-NEXT: # %bb.12: # %entry -; RV32IFD-NEXT: bgeu a3, a5, .LBB47_18 +; RV32IFD-NEXT: # %bb.10: # %entry +; RV32IFD-NEXT: sgtz a5, a4 +; RV32IFD-NEXT: or a2, a2, a4 +; RV32IFD-NEXT: bnez a2, .LBB47_12 +; RV32IFD-NEXT: j .LBB47_13 +; RV32IFD-NEXT: .LBB47_11: +; RV32IFD-NEXT: snez a5, a2 +; RV32IFD-NEXT: or a2, a2, a4 +; RV32IFD-NEXT: beqz a2, .LBB47_13 +; RV32IFD-NEXT: .LBB47_12: # %entry +; RV32IFD-NEXT: seqz a0, a5 +; RV32IFD-NEXT: addi a2, a0, -1 +; RV32IFD-NEXT: and a0, a2, a3 +; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: .LBB47_13: # %entry -; RV32IFD-NEXT: bnez a2, .LBB47_19 -; RV32IFD-NEXT: .LBB47_14: # %entry -; RV32IFD-NEXT: bgez a2, .LBB47_20 -; RV32IFD-NEXT: .LBB47_15: # %entry -; RV32IFD-NEXT: beqz a2, .LBB47_21 -; RV32IFD-NEXT: .LBB47_16: # %entry -; RV32IFD-NEXT: sgtz a4, a2 -; RV32IFD-NEXT: mv a5, a0 -; RV32IFD-NEXT: beqz a4, .LBB47_22 -; RV32IFD-NEXT: j .LBB47_23 -; RV32IFD-NEXT: .LBB47_17: # %entry -; RV32IFD-NEXT: li a4, 1 -; RV32IFD-NEXT: bltu a3, a5, .LBB47_13 -; RV32IFD-NEXT: .LBB47_18: # %entry -; RV32IFD-NEXT: li a3, 1 -; RV32IFD-NEXT: beqz a2, .LBB47_14 -; RV32IFD-NEXT: .LBB47_19: # %entry -; RV32IFD-NEXT: mv a3, a4 -; RV32IFD-NEXT: bltz a2, .LBB47_15 -; RV32IFD-NEXT: .LBB47_20: # %entry -; RV32IFD-NEXT: li a2, 0 -; RV32IFD-NEXT: bnez a2, .LBB47_16 -; RV32IFD-NEXT: .LBB47_21: -; RV32IFD-NEXT: snez a4, a3 -; RV32IFD-NEXT: mv a5, a0 -; RV32IFD-NEXT: bnez a4, .LBB47_23 -; RV32IFD-NEXT: .LBB47_22: # %entry -; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: .LBB47_23: # %entry -; RV32IFD-NEXT: mv a6, a0 -; RV32IFD-NEXT: beqz a1, .LBB47_30 -; RV32IFD-NEXT: # %bb.24: # %entry -; RV32IFD-NEXT: bnez a1, .LBB47_31 -; RV32IFD-NEXT: .LBB47_25: # %entry -; RV32IFD-NEXT: or a2, a3, a2 -; RV32IFD-NEXT: bnez a2, .LBB47_32 -; RV32IFD-NEXT: .LBB47_26: # %entry -; RV32IFD-NEXT: mv a3, a1 -; RV32IFD-NEXT: beqz a4, .LBB47_33 -; RV32IFD-NEXT: .LBB47_27: # %entry -; RV32IFD-NEXT: beqz a2, .LBB47_29 -; RV32IFD-NEXT: .LBB47_28: # %entry -; RV32IFD-NEXT: mv a1, a3 -; RV32IFD-NEXT: .LBB47_29: # %entry ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret -; RV32IFD-NEXT: .LBB47_30: # %entry -; RV32IFD-NEXT: li a6, 0 -; RV32IFD-NEXT: beqz a1, .LBB47_25 -; RV32IFD-NEXT: .LBB47_31: # %entry -; RV32IFD-NEXT: mv a0, a6 -; RV32IFD-NEXT: or a2, a3, a2 -; RV32IFD-NEXT: beqz a2, .LBB47_26 -; RV32IFD-NEXT: .LBB47_32: # %entry -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: mv a3, a1 -; RV32IFD-NEXT: bnez a4, .LBB47_27 -; RV32IFD-NEXT: .LBB47_33: # %entry -; RV32IFD-NEXT: li a3, 0 -; RV32IFD-NEXT: bnez a2, .LBB47_28 -; RV32IFD-NEXT: j .LBB47_29 entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -3794,115 +3530,95 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a7, 8(sp) -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a1, a4, -1 -; RV32-NEXT: mv a3, a7 -; RV32-NEXT: bne a5, a1, .LBB48_17 +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw t0, 8(sp) +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lui a3, 524288 +; RV32-NEXT: addi a6, a3, -1 +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: beq a4, a6, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: or a6, a0, a2 -; RV32-NEXT: bnez a6, .LBB48_18 +; RV32-NEXT: sltu a2, a4, a6 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bgez a2, .LBB48_19 -; RV32-NEXT: .LBB48_3: # %entry -; RV32-NEXT: bgeu a5, a1, .LBB48_20 +; RV32-NEXT: or a7, a1, a0 +; RV32-NEXT: slti a5, a0, 0 +; RV32-NEXT: bnez a7, .LBB48_16 +; RV32-NEXT: # %bb.3: # %entry +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bgez a0, .LBB48_17 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: bnez a6, .LBB48_21 +; RV32-NEXT: bgeu a4, a6, .LBB48_18 ; RV32-NEXT: .LBB48_5: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: bnez a2, .LBB48_22 +; RV32-NEXT: beqz a7, .LBB48_7 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: bgez a2, .LBB48_23 +; RV32-NEXT: mv a4, t0 ; RV32-NEXT: .LBB48_7: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bltz a2, .LBB48_24 -; RV32-NEXT: .LBB48_8: # %entry -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bltu a4, a5, .LBB48_10 +; RV32-NEXT: srai a6, a0, 31 +; RV32-NEXT: and a1, a6, a1 +; RV32-NEXT: seqz a6, a0 +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: and a5, a5, a0 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgez a5, .LBB48_9 +; RV32-NEXT: # %bb.8: # %entry +; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: .LBB48_9: # %entry +; RV32-NEXT: and a6, a6, a1 +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: bltu a3, a4, .LBB48_11 +; RV32-NEXT: # %bb.10: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB48_10: # %entry -; RV32-NEXT: and a6, a6, a2 +; RV32-NEXT: .LBB48_11: # %entry +; RV32-NEXT: and a6, a6, a5 ; RV32-NEXT: li a7, -1 -; RV32-NEXT: bne a6, a7, .LBB48_25 -; RV32-NEXT: # %bb.11: # %entry -; RV32-NEXT: mv t0, a3 -; RV32-NEXT: bgeu a4, a5, .LBB48_26 -; RV32-NEXT: .LBB48_12: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: bne a5, a4, .LBB48_27 +; RV32-NEXT: bne a6, a7, .LBB48_19 +; RV32-NEXT: # %bb.12: # %entry +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bne a4, a3, .LBB48_20 ; RV32-NEXT: .LBB48_13: # %entry -; RV32-NEXT: bltz a2, .LBB48_28 +; RV32-NEXT: beq a6, a7, .LBB48_15 ; RV32-NEXT: .LBB48_14: # %entry -; RV32-NEXT: beq a6, a7, .LBB48_16 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: .LBB48_15: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB48_16: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret +; RV32-NEXT: .LBB48_16: # %entry +; RV32-NEXT: addi a2, a5, -1 +; RV32-NEXT: or a2, a2, t0 +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bltz a0, .LBB48_4 ; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: sltu a3, a5, a1 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: or a3, a3, a7 -; RV32-NEXT: or a6, a0, a2 -; RV32-NEXT: beqz a6, .LBB48_2 +; RV32-NEXT: mv t0, a6 +; RV32-NEXT: bltu a4, a6, .LBB48_5 ; RV32-NEXT: .LBB48_18: # %entry -; RV32-NEXT: slti a3, a2, 0 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: or a3, a3, a7 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltz a2, .LBB48_3 +; RV32-NEXT: mv a4, a6 +; RV32-NEXT: bnez a7, .LBB48_6 +; RV32-NEXT: j .LBB48_7 ; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bltu a5, a1, .LBB48_4 -; RV32-NEXT: .LBB48_20: # %entry -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: beqz a6, .LBB48_5 -; RV32-NEXT: .LBB48_21: # %entry -; RV32-NEXT: mv a5, a7 -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a2, .LBB48_6 -; RV32-NEXT: .LBB48_22: # %entry -; RV32-NEXT: srai a1, a2, 31 -; RV32-NEXT: and a6, a1, a0 -; RV32-NEXT: bltz a2, .LBB48_7 -; RV32-NEXT: .LBB48_23: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgez a2, .LBB48_8 -; RV32-NEXT: .LBB48_24: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bgeu a4, a5, .LBB48_9 -; RV32-NEXT: j .LBB48_10 -; RV32-NEXT: .LBB48_25: # %entry ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: mv t0, a3 -; RV32-NEXT: bltu a4, a5, .LBB48_12 -; RV32-NEXT: .LBB48_26: # %entry -; RV32-NEXT: li t0, 0 -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: beq a5, a4, .LBB48_13 -; RV32-NEXT: .LBB48_27: # %entry -; RV32-NEXT: mv a0, t0 -; RV32-NEXT: bgez a2, .LBB48_14 -; RV32-NEXT: .LBB48_28: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: bne a6, a7, .LBB48_15 -; RV32-NEXT: j .LBB48_16 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beq a4, a3, .LBB48_13 +; RV32-NEXT: .LBB48_20: # %entry +; RV32-NEXT: sltu a0, a3, a4 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: bne a6, a7, .LBB48_14 +; RV32-NEXT: j .LBB48_15 ; ; RV64-LABEL: stest_f32i64_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: feq.s a0, fa0, fa0 -; RV64-NEXT: beqz a0, .LBB48_2 -; RV64-NEXT: # %bb.1: ; RV64-NEXT: fcvt.l.s a0, fa0, rtz -; RV64-NEXT: .LBB48_2: # %entry +; RV64-NEXT: feq.s a1, fa0, fa0 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret entry: %conv = fptosi float %x to i128 @@ -3921,40 +3637,24 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a0, .LBB49_3 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: beqz a2, .LBB49_4 -; RV32-NEXT: .LBB49_2: -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: j .LBB49_5 -; RV32-NEXT: .LBB49_3: -; RV32-NEXT: seqz a2, a3 -; RV32-NEXT: bnez a2, .LBB49_2 -; RV32-NEXT: .LBB49_4: # %entry -; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB49_5: # %entry -; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beqz a3, .LBB49_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: .LBB49_7: # %entry -; RV32-NEXT: bnez a2, .LBB49_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bnez a3, .LBB49_10 -; RV32-NEXT: j .LBB49_11 -; RV32-NEXT: .LBB49_9: +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: beqz a3, .LBB49_11 -; RV32-NEXT: .LBB49_10: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB49_11: # %entry +; RV32-NEXT: lw a3, 8(sp) +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: and a0, a1, a3 +; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3966,17 +3666,13 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: beqz a1, .LBB49_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: .LBB49_2: # %entry -; RV64-NEXT: li a3, 1 -; RV64-NEXT: beq a1, a3, .LBB49_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: .LBB49_4: # %entry +; RV64-NEXT: snez a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3996,109 +3692,68 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB50_3 +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: li a3, 1 +; RV32-NEXT: mv a6, a1 +; RV32-NEXT: bltz a0, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a0, a2, 0 -; RV32-NEXT: beqz a0, .LBB50_4 -; RV32-NEXT: .LBB50_2: -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: j .LBB50_5 -; RV32-NEXT: .LBB50_3: -; RV32-NEXT: seqz a0, a3 -; RV32-NEXT: bnez a0, .LBB50_2 +; RV32-NEXT: li a6, 1 +; RV32-NEXT: .LBB50_2: # %entry +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB50_4 +; RV32-NEXT: # %bb.3: # %entry +; RV32-NEXT: li a2, 1 ; RV32-NEXT: .LBB50_4: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB50_5: # %entry -; RV32-NEXT: xori a1, a3, 1 -; RV32-NEXT: or a4, a1, a2 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a4, .LBB50_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: mv a1, a5 +; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a3, 8(sp) +; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: beqz a0, .LBB50_6 +; RV32-NEXT: # %bb.5: # %entry +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: mv a6, a4 +; RV32-NEXT: j .LBB50_7 +; RV32-NEXT: .LBB50_6: +; RV32-NEXT: seqz a6, a1 ; RV32-NEXT: .LBB50_7: # %entry -; RV32-NEXT: bnez a0, .LBB50_9 +; RV32-NEXT: seqz a6, a6 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: and a3, a6, a3 +; RV32-NEXT: xori a1, a1, 1 +; RV32-NEXT: or a1, a1, a0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a3, a1, a3 +; RV32-NEXT: and a5, a6, a5 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: neg a4, a4 +; RV32-NEXT: and a4, a4, a0 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beqz a1, .LBB50_9 ; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: bnez a4, .LBB50_10 -; RV32-NEXT: j .LBB50_11 -; RV32-NEXT: .LBB50_9: -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: li a0, 0 +; RV32-NEXT: seqz a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: .LBB50_9: # %entry ; RV32-NEXT: beqz a4, .LBB50_11 -; RV32-NEXT: .LBB50_10: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: .LBB50_11: # %entry -; RV32-NEXT: li a5, 1 -; RV32-NEXT: mv a4, a3 -; RV32-NEXT: bgez a2, .LBB50_17 -; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: bgeu a3, a5, .LBB50_18 +; RV32-NEXT: # %bb.10: # %entry +; RV32-NEXT: sgtz a5, a4 +; RV32-NEXT: or a2, a2, a4 +; RV32-NEXT: bnez a2, .LBB50_12 +; RV32-NEXT: j .LBB50_13 +; RV32-NEXT: .LBB50_11: +; RV32-NEXT: snez a5, a2 +; RV32-NEXT: or a2, a2, a4 +; RV32-NEXT: beqz a2, .LBB50_13 +; RV32-NEXT: .LBB50_12: # %entry +; RV32-NEXT: seqz a0, a5 +; RV32-NEXT: addi a2, a0, -1 +; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: .LBB50_13: # %entry -; RV32-NEXT: bnez a2, .LBB50_19 -; RV32-NEXT: .LBB50_14: # %entry -; RV32-NEXT: bgez a2, .LBB50_20 -; RV32-NEXT: .LBB50_15: # %entry -; RV32-NEXT: beqz a2, .LBB50_21 -; RV32-NEXT: .LBB50_16: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: beqz a4, .LBB50_22 -; RV32-NEXT: j .LBB50_23 -; RV32-NEXT: .LBB50_17: # %entry -; RV32-NEXT: li a4, 1 -; RV32-NEXT: bltu a3, a5, .LBB50_13 -; RV32-NEXT: .LBB50_18: # %entry -; RV32-NEXT: li a3, 1 -; RV32-NEXT: beqz a2, .LBB50_14 -; RV32-NEXT: .LBB50_19: # %entry -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: bltz a2, .LBB50_15 -; RV32-NEXT: .LBB50_20: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bnez a2, .LBB50_16 -; RV32-NEXT: .LBB50_21: -; RV32-NEXT: snez a4, a3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: bnez a4, .LBB50_23 -; RV32-NEXT: .LBB50_22: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB50_23: # %entry -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: beqz a1, .LBB50_30 -; RV32-NEXT: # %bb.24: # %entry -; RV32-NEXT: bnez a1, .LBB50_31 -; RV32-NEXT: .LBB50_25: # %entry -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB50_32 -; RV32-NEXT: .LBB50_26: # %entry -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: beqz a4, .LBB50_33 -; RV32-NEXT: .LBB50_27: # %entry -; RV32-NEXT: beqz a2, .LBB50_29 -; RV32-NEXT: .LBB50_28: # %entry -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB50_29: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret -; RV32-NEXT: .LBB50_30: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a1, .LBB50_25 -; RV32-NEXT: .LBB50_31: # %entry -; RV32-NEXT: mv a0, a6 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: beqz a2, .LBB50_26 -; RV32-NEXT: .LBB50_32: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bnez a4, .LBB50_27 -; RV32-NEXT: .LBB50_33: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: bnez a2, .LBB50_28 -; RV32-NEXT: j .LBB50_29 ; ; RV64-LABEL: ustest_f32i64_mm: ; RV64: # %bb.0: # %entry @@ -4107,37 +3762,27 @@ ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixsfti@plt -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a4, 1 -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: bgtz a1, .LBB50_6 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: blez a1, .LBB50_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: bne a1, a4, .LBB50_7 +; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB50_2: # %entry -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: blez a3, .LBB50_8 -; RV64-NEXT: .LBB50_3: # %entry -; RV64-NEXT: beqz a3, .LBB50_5 +; RV64-NEXT: sgtz a3, a1 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: beqz a2, .LBB50_4 +; RV64-NEXT: # %bb.3: # %entry +; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: .LBB50_4: # %entry -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB50_5: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret -; RV64-NEXT: .LBB50_6: # %entry -; RV64-NEXT: li a2, 0 -; RV64-NEXT: li a3, 1 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: beq a1, a4, .LBB50_2 -; RV64-NEXT: .LBB50_7: # %entry -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bgtz a3, .LBB50_3 -; RV64-NEXT: .LBB50_8: # %entry -; RV64-NEXT: li a1, 0 -; RV64-NEXT: bnez a3, .LBB50_4 -; RV64-NEXT: j .LBB50_5 entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -4157,107 +3802,87 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a7, 8(sp) -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a1, a4, -1 -; RV32-NEXT: mv a3, a7 -; RV32-NEXT: bne a5, a1, .LBB51_17 +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw t0, 8(sp) +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lui a3, 524288 +; RV32-NEXT: addi a6, a3, -1 +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: beq a4, a6, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: or a6, a0, a2 -; RV32-NEXT: bnez a6, .LBB51_18 +; RV32-NEXT: sltu a2, a4, a6 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bgez a2, .LBB51_19 -; RV32-NEXT: .LBB51_3: # %entry -; RV32-NEXT: bgeu a5, a1, .LBB51_20 +; RV32-NEXT: or a7, a1, a0 +; RV32-NEXT: slti a5, a0, 0 +; RV32-NEXT: bnez a7, .LBB51_16 +; RV32-NEXT: # %bb.3: # %entry +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bgez a0, .LBB51_17 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: bnez a6, .LBB51_21 +; RV32-NEXT: bgeu a4, a6, .LBB51_18 ; RV32-NEXT: .LBB51_5: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: bnez a2, .LBB51_22 +; RV32-NEXT: beqz a7, .LBB51_7 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: bgez a2, .LBB51_23 +; RV32-NEXT: mv a4, t0 ; RV32-NEXT: .LBB51_7: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bltz a2, .LBB51_24 -; RV32-NEXT: .LBB51_8: # %entry -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bltu a4, a5, .LBB51_10 +; RV32-NEXT: srai a6, a0, 31 +; RV32-NEXT: and a1, a6, a1 +; RV32-NEXT: seqz a6, a0 +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: and a5, a5, a0 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgez a5, .LBB51_9 +; RV32-NEXT: # %bb.8: # %entry +; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: .LBB51_9: # %entry +; RV32-NEXT: and a6, a6, a1 +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: bltu a3, a4, .LBB51_11 +; RV32-NEXT: # %bb.10: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB51_10: # %entry -; RV32-NEXT: and a6, a6, a2 +; RV32-NEXT: .LBB51_11: # %entry +; RV32-NEXT: and a6, a6, a5 ; RV32-NEXT: li a7, -1 -; RV32-NEXT: bne a6, a7, .LBB51_25 -; RV32-NEXT: # %bb.11: # %entry -; RV32-NEXT: mv t0, a3 -; RV32-NEXT: bgeu a4, a5, .LBB51_26 -; RV32-NEXT: .LBB51_12: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: bne a5, a4, .LBB51_27 +; RV32-NEXT: bne a6, a7, .LBB51_19 +; RV32-NEXT: # %bb.12: # %entry +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bne a4, a3, .LBB51_20 ; RV32-NEXT: .LBB51_13: # %entry -; RV32-NEXT: bltz a2, .LBB51_28 +; RV32-NEXT: beq a6, a7, .LBB51_15 ; RV32-NEXT: .LBB51_14: # %entry -; RV32-NEXT: beq a6, a7, .LBB51_16 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: .LBB51_15: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB51_16: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret +; RV32-NEXT: .LBB51_16: # %entry +; RV32-NEXT: addi a2, a5, -1 +; RV32-NEXT: or a2, a2, t0 +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bltz a0, .LBB51_4 ; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: sltu a3, a5, a1 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: or a3, a3, a7 -; RV32-NEXT: or a6, a0, a2 -; RV32-NEXT: beqz a6, .LBB51_2 +; RV32-NEXT: mv t0, a6 +; RV32-NEXT: bltu a4, a6, .LBB51_5 ; RV32-NEXT: .LBB51_18: # %entry -; RV32-NEXT: slti a3, a2, 0 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: or a3, a3, a7 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltz a2, .LBB51_3 +; RV32-NEXT: mv a4, a6 +; RV32-NEXT: bnez a7, .LBB51_6 +; RV32-NEXT: j .LBB51_7 ; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bltu a5, a1, .LBB51_4 -; RV32-NEXT: .LBB51_20: # %entry -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: beqz a6, .LBB51_5 -; RV32-NEXT: .LBB51_21: # %entry -; RV32-NEXT: mv a5, a7 -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a2, .LBB51_6 -; RV32-NEXT: .LBB51_22: # %entry -; RV32-NEXT: srai a1, a2, 31 -; RV32-NEXT: and a6, a1, a0 -; RV32-NEXT: bltz a2, .LBB51_7 -; RV32-NEXT: .LBB51_23: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgez a2, .LBB51_8 -; RV32-NEXT: .LBB51_24: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bgeu a4, a5, .LBB51_9 -; RV32-NEXT: j .LBB51_10 -; RV32-NEXT: .LBB51_25: # %entry ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: mv t0, a3 -; RV32-NEXT: bltu a4, a5, .LBB51_12 -; RV32-NEXT: .LBB51_26: # %entry -; RV32-NEXT: li t0, 0 -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: beq a5, a4, .LBB51_13 -; RV32-NEXT: .LBB51_27: # %entry -; RV32-NEXT: mv a0, t0 -; RV32-NEXT: bgez a2, .LBB51_14 -; RV32-NEXT: .LBB51_28: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: bne a6, a7, .LBB51_15 -; RV32-NEXT: j .LBB51_16 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beq a4, a3, .LBB51_13 +; RV32-NEXT: .LBB51_20: # %entry +; RV32-NEXT: sltu a0, a3, a4 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: bne a6, a7, .LBB51_14 +; RV32-NEXT: j .LBB51_15 ; ; RV64-LABEL: stest_f16i64_mm: ; RV64: # %bb.0: # %entry @@ -4271,46 +3896,44 @@ ; RV64-NEXT: li a2, -1 ; RV64-NEXT: srli a4, a2, 1 ; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bgez a1, .LBB51_10 +; RV64-NEXT: bgez a1, .LBB51_9 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: bgeu a0, a4, .LBB51_11 +; RV64-NEXT: bgeu a0, a4, .LBB51_10 ; RV64-NEXT: .LBB51_2: # %entry -; RV64-NEXT: bnez a1, .LBB51_12 +; RV64-NEXT: beqz a1, .LBB51_4 ; RV64-NEXT: .LBB51_3: # %entry -; RV64-NEXT: bltz a1, .LBB51_5 +; RV64-NEXT: mv a0, a3 ; RV64-NEXT: .LBB51_4: # %entry -; RV64-NEXT: li a1, 0 -; RV64-NEXT: .LBB51_5: # %entry +; RV64-NEXT: slti a3, a1, 0 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: and a1, a3, a1 ; RV64-NEXT: slli a4, a2, 63 ; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bltz a1, .LBB51_13 -; RV64-NEXT: # %bb.6: # %entry -; RV64-NEXT: bgeu a4, a0, .LBB51_14 +; RV64-NEXT: bltz a1, .LBB51_11 +; RV64-NEXT: # %bb.5: # %entry +; RV64-NEXT: bgeu a4, a0, .LBB51_12 +; RV64-NEXT: .LBB51_6: # %entry +; RV64-NEXT: beq a1, a2, .LBB51_8 ; RV64-NEXT: .LBB51_7: # %entry -; RV64-NEXT: beq a1, a2, .LBB51_9 -; RV64-NEXT: .LBB51_8: # %entry ; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB51_9: # %entry +; RV64-NEXT: .LBB51_8: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret -; RV64-NEXT: .LBB51_10: # %entry +; RV64-NEXT: .LBB51_9: # %entry ; RV64-NEXT: mv a3, a4 ; RV64-NEXT: bltu a0, a4, .LBB51_2 -; RV64-NEXT: .LBB51_11: # %entry +; RV64-NEXT: .LBB51_10: # %entry ; RV64-NEXT: mv a0, a4 -; RV64-NEXT: beqz a1, .LBB51_3 -; RV64-NEXT: .LBB51_12: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: bgez a1, .LBB51_4 -; RV64-NEXT: j .LBB51_5 -; RV64-NEXT: .LBB51_13: # %entry +; RV64-NEXT: bnez a1, .LBB51_3 +; RV64-NEXT: j .LBB51_4 +; RV64-NEXT: .LBB51_11: # %entry ; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a4, a0, .LBB51_7 -; RV64-NEXT: .LBB51_14: # %entry +; RV64-NEXT: bltu a4, a0, .LBB51_6 +; RV64-NEXT: .LBB51_12: # %entry ; RV64-NEXT: mv a0, a4 -; RV64-NEXT: bne a1, a2, .LBB51_8 -; RV64-NEXT: j .LBB51_9 +; RV64-NEXT: bne a1, a2, .LBB51_7 +; RV64-NEXT: j .LBB51_8 entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -4330,40 +3953,24 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a0, .LBB52_3 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: beqz a2, .LBB52_4 -; RV32-NEXT: .LBB52_2: -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: j .LBB52_5 -; RV32-NEXT: .LBB52_3: -; RV32-NEXT: seqz a2, a3 -; RV32-NEXT: bnez a2, .LBB52_2 -; RV32-NEXT: .LBB52_4: # %entry -; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB52_5: # %entry -; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beqz a3, .LBB52_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: .LBB52_7: # %entry -; RV32-NEXT: bnez a2, .LBB52_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bnez a3, .LBB52_10 -; RV32-NEXT: j .LBB52_11 -; RV32-NEXT: .LBB52_9: +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: beqz a3, .LBB52_11 -; RV32-NEXT: .LBB52_10: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB52_11: # %entry +; RV32-NEXT: lw a3, 8(sp) +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: and a0, a1, a3 +; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -4377,17 +3984,13 @@ ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: beqz a1, .LBB52_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: .LBB52_2: # %entry -; RV64-NEXT: li a3, 1 -; RV64-NEXT: beq a1, a3, .LBB52_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: .LBB52_4: # %entry +; RV64-NEXT: snez a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -4409,109 +4012,68 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB53_3 +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: li a3, 1 +; RV32-NEXT: mv a6, a1 +; RV32-NEXT: bltz a0, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a0, a2, 0 -; RV32-NEXT: beqz a0, .LBB53_4 -; RV32-NEXT: .LBB53_2: -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: j .LBB53_5 -; RV32-NEXT: .LBB53_3: -; RV32-NEXT: seqz a0, a3 -; RV32-NEXT: bnez a0, .LBB53_2 +; RV32-NEXT: li a6, 1 +; RV32-NEXT: .LBB53_2: # %entry +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB53_4 +; RV32-NEXT: # %bb.3: # %entry +; RV32-NEXT: li a2, 1 ; RV32-NEXT: .LBB53_4: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB53_5: # %entry -; RV32-NEXT: xori a1, a3, 1 -; RV32-NEXT: or a4, a1, a2 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a4, .LBB53_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: mv a1, a5 +; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a3, 8(sp) +; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: beqz a0, .LBB53_6 +; RV32-NEXT: # %bb.5: # %entry +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: mv a6, a4 +; RV32-NEXT: j .LBB53_7 +; RV32-NEXT: .LBB53_6: +; RV32-NEXT: seqz a6, a1 ; RV32-NEXT: .LBB53_7: # %entry -; RV32-NEXT: bnez a0, .LBB53_9 +; RV32-NEXT: seqz a6, a6 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: and a3, a6, a3 +; RV32-NEXT: xori a1, a1, 1 +; RV32-NEXT: or a1, a1, a0 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a3, a1, a3 +; RV32-NEXT: and a5, a6, a5 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: neg a4, a4 +; RV32-NEXT: and a4, a4, a0 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beqz a1, .LBB53_9 ; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: bnez a4, .LBB53_10 -; RV32-NEXT: j .LBB53_11 -; RV32-NEXT: .LBB53_9: -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: li a0, 0 +; RV32-NEXT: seqz a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: .LBB53_9: # %entry ; RV32-NEXT: beqz a4, .LBB53_11 -; RV32-NEXT: .LBB53_10: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: .LBB53_11: # %entry -; RV32-NEXT: li a5, 1 -; RV32-NEXT: mv a4, a3 -; RV32-NEXT: bgez a2, .LBB53_17 -; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: bgeu a3, a5, .LBB53_18 +; RV32-NEXT: # %bb.10: # %entry +; RV32-NEXT: sgtz a5, a4 +; RV32-NEXT: or a2, a2, a4 +; RV32-NEXT: bnez a2, .LBB53_12 +; RV32-NEXT: j .LBB53_13 +; RV32-NEXT: .LBB53_11: +; RV32-NEXT: snez a5, a2 +; RV32-NEXT: or a2, a2, a4 +; RV32-NEXT: beqz a2, .LBB53_13 +; RV32-NEXT: .LBB53_12: # %entry +; RV32-NEXT: seqz a0, a5 +; RV32-NEXT: addi a2, a0, -1 +; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: .LBB53_13: # %entry -; RV32-NEXT: bnez a2, .LBB53_19 -; RV32-NEXT: .LBB53_14: # %entry -; RV32-NEXT: bgez a2, .LBB53_20 -; RV32-NEXT: .LBB53_15: # %entry -; RV32-NEXT: beqz a2, .LBB53_21 -; RV32-NEXT: .LBB53_16: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: beqz a4, .LBB53_22 -; RV32-NEXT: j .LBB53_23 -; RV32-NEXT: .LBB53_17: # %entry -; RV32-NEXT: li a4, 1 -; RV32-NEXT: bltu a3, a5, .LBB53_13 -; RV32-NEXT: .LBB53_18: # %entry -; RV32-NEXT: li a3, 1 -; RV32-NEXT: beqz a2, .LBB53_14 -; RV32-NEXT: .LBB53_19: # %entry -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: bltz a2, .LBB53_15 -; RV32-NEXT: .LBB53_20: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bnez a2, .LBB53_16 -; RV32-NEXT: .LBB53_21: -; RV32-NEXT: snez a4, a3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: bnez a4, .LBB53_23 -; RV32-NEXT: .LBB53_22: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB53_23: # %entry -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: beqz a1, .LBB53_30 -; RV32-NEXT: # %bb.24: # %entry -; RV32-NEXT: bnez a1, .LBB53_31 -; RV32-NEXT: .LBB53_25: # %entry -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB53_32 -; RV32-NEXT: .LBB53_26: # %entry -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: beqz a4, .LBB53_33 -; RV32-NEXT: .LBB53_27: # %entry -; RV32-NEXT: beqz a2, .LBB53_29 -; RV32-NEXT: .LBB53_28: # %entry -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB53_29: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret -; RV32-NEXT: .LBB53_30: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a1, .LBB53_25 -; RV32-NEXT: .LBB53_31: # %entry -; RV32-NEXT: mv a0, a6 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: beqz a2, .LBB53_26 -; RV32-NEXT: .LBB53_32: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bnez a4, .LBB53_27 -; RV32-NEXT: .LBB53_33: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: bnez a2, .LBB53_28 -; RV32-NEXT: j .LBB53_29 ; ; RV64-LABEL: ustest_f16i64_mm: ; RV64: # %bb.0: # %entry @@ -4522,37 +4084,27 @@ ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a4, 1 -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: bgtz a1, .LBB53_6 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: blez a1, .LBB53_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a0, 0 -; RV64-NEXT: bne a1, a4, .LBB53_7 +; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB53_2: # %entry -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: blez a3, .LBB53_8 -; RV64-NEXT: .LBB53_3: # %entry -; RV64-NEXT: beqz a3, .LBB53_5 +; RV64-NEXT: sgtz a3, a1 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: beqz a2, .LBB53_4 +; RV64-NEXT: # %bb.3: # %entry +; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: .LBB53_4: # %entry -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB53_5: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret -; RV64-NEXT: .LBB53_6: # %entry -; RV64-NEXT: li a2, 0 -; RV64-NEXT: li a3, 1 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: beq a1, a4, .LBB53_2 -; RV64-NEXT: .LBB53_7: # %entry -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bgtz a3, .LBB53_3 -; RV64-NEXT: .LBB53_8: # %entry -; RV64-NEXT: li a1, 0 -; RV64-NEXT: bnez a3, .LBB53_4 -; RV64-NEXT: j .LBB53_5 entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -100,26 +100,20 @@ ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a2, a0, 32 ; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz -; CHECK-NOV-NEXT: bge a1, a2, .LBB2_5 +; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bge a0, a2, .LBB2_6 -; CHECK-NOV-NEXT: .LBB2_2: # %entry -; CHECK-NOV-NEXT: blez a0, .LBB2_7 -; CHECK-NOV-NEXT: .LBB2_3: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB2_8 -; CHECK-NOV-NEXT: .LBB2_4: # %entry -; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB2_5: # %entry ; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2 -; CHECK-NOV-NEXT: .LBB2_6: # %entry +; CHECK-NOV-NEXT: .LBB2_2: # %entry +; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: bgtz a0, .LBB2_3 -; CHECK-NOV-NEXT: .LBB2_7: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB2_4 -; CHECK-NOV-NEXT: .LBB2_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 +; CHECK-NOV-NEXT: .LBB2_4: # %entry +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sgtz a2, a1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ret ; ; CHECK-V-LABEL: ustest_f64i32: @@ -283,57 +277,49 @@ ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz ; CHECK-NOV-NEXT: li a2, -1 -; CHECK-NOV-NEXT: srli a5, a2, 32 +; CHECK-NOV-NEXT: srli a4, a2, 32 ; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a5, .LBB5_10 +; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a5, .LBB5_11 +; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7 ; CHECK-NOV-NEXT: .LBB5_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz -; CHECK-NOV-NEXT: bge a3, a5, .LBB5_12 +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8 ; CHECK-NOV-NEXT: .LBB5_3: # %entry -; CHECK-NOV-NEXT: bge a4, a5, .LBB5_13 +; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5 ; CHECK-NOV-NEXT: .LBB5_4: # %entry -; CHECK-NOV-NEXT: blez a4, .LBB5_14 +; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB5_5: # %entry -; CHECK-NOV-NEXT: blez a3, .LBB5_15 -; CHECK-NOV-NEXT: .LBB5_6: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB5_16 -; CHECK-NOV-NEXT: .LBB5_7: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB5_9 -; CHECK-NOV-NEXT: .LBB5_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: .LBB5_9: # %entry +; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a4, a4, a5 +; CHECK-NOV-NEXT: sgtz a5, a3 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a3, a5, a3 +; CHECK-NOV-NEXT: sgtz a5, a2 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a2, a5, a2 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: sw a2, 8(a0) ; CHECK-NOV-NEXT: sw a3, 4(a0) ; CHECK-NOV-NEXT: sw a4, 0(a0) ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB5_10: # %entry -; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: .LBB5_6: # %entry +; CHECK-NOV-NEXT: mv a1, a4 ; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a5, .LBB5_2 -; CHECK-NOV-NEXT: .LBB5_11: # %entry -; CHECK-NOV-NEXT: mv a2, a5 -; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz -; CHECK-NOV-NEXT: blt a3, a5, .LBB5_3 -; CHECK-NOV-NEXT: .LBB5_12: # %entry -; CHECK-NOV-NEXT: mv a3, a5 -; CHECK-NOV-NEXT: blt a4, a5, .LBB5_4 -; CHECK-NOV-NEXT: .LBB5_13: # %entry -; CHECK-NOV-NEXT: mv a4, a5 -; CHECK-NOV-NEXT: bgtz a4, .LBB5_5 -; CHECK-NOV-NEXT: .LBB5_14: # %entry -; CHECK-NOV-NEXT: li a4, 0 -; CHECK-NOV-NEXT: bgtz a3, .LBB5_6 -; CHECK-NOV-NEXT: .LBB5_15: # %entry -; CHECK-NOV-NEXT: li a3, 0 -; CHECK-NOV-NEXT: bgtz a2, .LBB5_7 -; CHECK-NOV-NEXT: .LBB5_16: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: blez a1, .LBB5_8 -; CHECK-NOV-NEXT: j .LBB5_9 +; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2 +; CHECK-NOV-NEXT: .LBB5_7: # %entry +; CHECK-NOV-NEXT: mv a2, a4 +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3 +; CHECK-NOV-NEXT: .LBB5_8: # %entry +; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: bge a5, a4, .LBB5_4 +; CHECK-NOV-NEXT: j .LBB5_5 ; ; CHECK-V-LABEL: ustest_f32i32: ; CHECK-V: # %bb.0: # %entry @@ -716,29 +702,33 @@ ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: li a1, -1 -; CHECK-NOV-NEXT: srli a3, a1, 32 -; CHECK-NOV-NEXT: bge a0, a3, .LBB8_10 +; CHECK-NOV-NEXT: srli a2, a1, 32 +; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz -; CHECK-NOV-NEXT: bge s2, a3, .LBB8_11 +; CHECK-NOV-NEXT: bge s2, a2, .LBB8_7 ; CHECK-NOV-NEXT: .LBB8_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz -; CHECK-NOV-NEXT: bge a1, a3, .LBB8_12 +; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz +; CHECK-NOV-NEXT: bge a1, a2, .LBB8_8 ; CHECK-NOV-NEXT: .LBB8_3: # %entry -; CHECK-NOV-NEXT: bge a2, a3, .LBB8_13 +; CHECK-NOV-NEXT: blt a3, a2, .LBB8_5 ; CHECK-NOV-NEXT: .LBB8_4: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB8_14 +; CHECK-NOV-NEXT: mv a3, a2 ; CHECK-NOV-NEXT: .LBB8_5: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB8_15 -; CHECK-NOV-NEXT: .LBB8_6: # %entry -; CHECK-NOV-NEXT: blez s2, .LBB8_16 -; CHECK-NOV-NEXT: .LBB8_7: # %entry -; CHECK-NOV-NEXT: bgtz a0, .LBB8_9 -; CHECK-NOV-NEXT: .LBB8_8: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB8_9: # %entry +; CHECK-NOV-NEXT: sgtz a2, a3 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a2, a2, a3 +; CHECK-NOV-NEXT: sgtz a3, a1 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a1, a3, a1 +; CHECK-NOV-NEXT: sgtz a3, s2 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, s2 +; CHECK-NOV-NEXT: sgtz a4, a0 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw s2, 8(s0) +; CHECK-NOV-NEXT: sw a3, 8(s0) ; CHECK-NOV-NEXT: sw a1, 4(s0) ; CHECK-NOV-NEXT: sw a2, 0(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -751,30 +741,18 @@ ; CHECK-NOV-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 64 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB8_10: # %entry -; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: .LBB8_6: # %entry +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz -; CHECK-NOV-NEXT: blt s2, a3, .LBB8_2 -; CHECK-NOV-NEXT: .LBB8_11: # %entry -; CHECK-NOV-NEXT: mv s2, a3 -; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz -; CHECK-NOV-NEXT: blt a1, a3, .LBB8_3 -; CHECK-NOV-NEXT: .LBB8_12: # %entry -; CHECK-NOV-NEXT: mv a1, a3 -; CHECK-NOV-NEXT: blt a2, a3, .LBB8_4 -; CHECK-NOV-NEXT: .LBB8_13: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bgtz a2, .LBB8_5 -; CHECK-NOV-NEXT: .LBB8_14: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB8_6 -; CHECK-NOV-NEXT: .LBB8_15: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgtz s2, .LBB8_7 -; CHECK-NOV-NEXT: .LBB8_16: # %entry -; CHECK-NOV-NEXT: li s2, 0 -; CHECK-NOV-NEXT: blez a0, .LBB8_8 -; CHECK-NOV-NEXT: j .LBB8_9 +; CHECK-NOV-NEXT: blt s2, a2, .LBB8_2 +; CHECK-NOV-NEXT: .LBB8_7: # %entry +; CHECK-NOV-NEXT: mv s2, a2 +; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB8_3 +; CHECK-NOV-NEXT: .LBB8_8: # %entry +; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: bge a3, a2, .LBB8_4 +; CHECK-NOV-NEXT: j .LBB8_5 ; ; CHECK-V-LABEL: ustest_f16i32: ; CHECK-V: # %bb.0: # %entry @@ -946,26 +924,20 @@ ; CHECK-NOV-NEXT: lui a0, 16 ; CHECK-NOV-NEXT: addiw a2, a0, -1 ; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz -; CHECK-NOV-NEXT: bge a1, a2, .LBB11_5 +; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bge a0, a2, .LBB11_6 -; CHECK-NOV-NEXT: .LBB11_2: # %entry -; CHECK-NOV-NEXT: blez a0, .LBB11_7 -; CHECK-NOV-NEXT: .LBB11_3: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB11_8 -; CHECK-NOV-NEXT: .LBB11_4: # %entry -; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB11_5: # %entry ; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2 -; CHECK-NOV-NEXT: .LBB11_6: # %entry +; CHECK-NOV-NEXT: .LBB11_2: # %entry +; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: bgtz a0, .LBB11_3 -; CHECK-NOV-NEXT: .LBB11_7: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB11_4 -; CHECK-NOV-NEXT: .LBB11_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 +; CHECK-NOV-NEXT: .LBB11_4: # %entry +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sgtz a2, a1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ret ; ; CHECK-V-LABEL: ustest_f64i16: @@ -1130,57 +1102,49 @@ ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz ; CHECK-NOV-NEXT: lui a2, 16 -; CHECK-NOV-NEXT: addiw a5, a2, -1 +; CHECK-NOV-NEXT: addiw a4, a2, -1 ; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a5, .LBB14_10 +; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a5, .LBB14_11 +; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7 ; CHECK-NOV-NEXT: .LBB14_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz -; CHECK-NOV-NEXT: bge a3, a5, .LBB14_12 +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz +; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8 ; CHECK-NOV-NEXT: .LBB14_3: # %entry -; CHECK-NOV-NEXT: bge a4, a5, .LBB14_13 +; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5 ; CHECK-NOV-NEXT: .LBB14_4: # %entry -; CHECK-NOV-NEXT: blez a4, .LBB14_14 +; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB14_5: # %entry -; CHECK-NOV-NEXT: blez a3, .LBB14_15 -; CHECK-NOV-NEXT: .LBB14_6: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB14_16 -; CHECK-NOV-NEXT: .LBB14_7: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB14_9 -; CHECK-NOV-NEXT: .LBB14_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: .LBB14_9: # %entry +; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a4, a4, a5 +; CHECK-NOV-NEXT: sgtz a5, a3 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a3, a5, a3 +; CHECK-NOV-NEXT: sgtz a5, a2 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a2, a5, a2 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: sh a2, 4(a0) ; CHECK-NOV-NEXT: sh a3, 2(a0) ; CHECK-NOV-NEXT: sh a4, 0(a0) ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB14_10: # %entry -; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: .LBB14_6: # %entry +; CHECK-NOV-NEXT: mv a1, a4 ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a5, .LBB14_2 -; CHECK-NOV-NEXT: .LBB14_11: # %entry -; CHECK-NOV-NEXT: mv a2, a5 -; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz -; CHECK-NOV-NEXT: blt a3, a5, .LBB14_3 -; CHECK-NOV-NEXT: .LBB14_12: # %entry -; CHECK-NOV-NEXT: mv a3, a5 -; CHECK-NOV-NEXT: blt a4, a5, .LBB14_4 -; CHECK-NOV-NEXT: .LBB14_13: # %entry -; CHECK-NOV-NEXT: mv a4, a5 -; CHECK-NOV-NEXT: bgtz a4, .LBB14_5 -; CHECK-NOV-NEXT: .LBB14_14: # %entry -; CHECK-NOV-NEXT: li a4, 0 -; CHECK-NOV-NEXT: bgtz a3, .LBB14_6 -; CHECK-NOV-NEXT: .LBB14_15: # %entry -; CHECK-NOV-NEXT: li a3, 0 -; CHECK-NOV-NEXT: bgtz a2, .LBB14_7 -; CHECK-NOV-NEXT: .LBB14_16: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: blez a1, .LBB14_8 -; CHECK-NOV-NEXT: j .LBB14_9 +; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2 +; CHECK-NOV-NEXT: .LBB14_7: # %entry +; CHECK-NOV-NEXT: mv a2, a4 +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz +; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3 +; CHECK-NOV-NEXT: .LBB14_8: # %entry +; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: bge a5, a4, .LBB14_4 +; CHECK-NOV-NEXT: j .LBB14_5 ; ; CHECK-V-LABEL: ustest_f32i16: ; CHECK-V: # %bb.0: # %entry @@ -1865,55 +1829,63 @@ ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a1, 16 -; CHECK-NOV-NEXT: addiw a7, a1, -1 -; CHECK-NOV-NEXT: bge a0, a7, .LBB17_18 +; CHECK-NOV-NEXT: addiw a3, a1, -1 +; CHECK-NOV-NEXT: bge a0, a3, .LBB17_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz -; CHECK-NOV-NEXT: bge s2, a7, .LBB17_19 +; CHECK-NOV-NEXT: bge s2, a3, .LBB17_11 ; CHECK-NOV-NEXT: .LBB17_2: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz -; CHECK-NOV-NEXT: bge a1, a7, .LBB17_20 +; CHECK-NOV-NEXT: bge a1, a3, .LBB17_12 ; CHECK-NOV-NEXT: .LBB17_3: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: bge a2, a7, .LBB17_21 +; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz +; CHECK-NOV-NEXT: bge a2, a3, .LBB17_13 ; CHECK-NOV-NEXT: .LBB17_4: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: bge a3, a7, .LBB17_22 +; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz +; CHECK-NOV-NEXT: bge a4, a3, .LBB17_14 ; CHECK-NOV-NEXT: .LBB17_5: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz -; CHECK-NOV-NEXT: bge a4, a7, .LBB17_23 +; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz +; CHECK-NOV-NEXT: bge a5, a3, .LBB17_15 ; CHECK-NOV-NEXT: .LBB17_6: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz -; CHECK-NOV-NEXT: bge a5, a7, .LBB17_24 +; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz +; CHECK-NOV-NEXT: bge a6, a3, .LBB17_16 ; CHECK-NOV-NEXT: .LBB17_7: # %entry -; CHECK-NOV-NEXT: bge a6, a7, .LBB17_25 +; CHECK-NOV-NEXT: blt a7, a3, .LBB17_9 ; CHECK-NOV-NEXT: .LBB17_8: # %entry -; CHECK-NOV-NEXT: blez a6, .LBB17_26 +; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB17_9: # %entry -; CHECK-NOV-NEXT: blez a5, .LBB17_27 -; CHECK-NOV-NEXT: .LBB17_10: # %entry -; CHECK-NOV-NEXT: blez a4, .LBB17_28 -; CHECK-NOV-NEXT: .LBB17_11: # %entry -; CHECK-NOV-NEXT: blez a3, .LBB17_29 -; CHECK-NOV-NEXT: .LBB17_12: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB17_30 -; CHECK-NOV-NEXT: .LBB17_13: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB17_31 -; CHECK-NOV-NEXT: .LBB17_14: # %entry -; CHECK-NOV-NEXT: blez s2, .LBB17_32 -; CHECK-NOV-NEXT: .LBB17_15: # %entry -; CHECK-NOV-NEXT: bgtz a0, .LBB17_17 -; CHECK-NOV-NEXT: .LBB17_16: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB17_17: # %entry +; CHECK-NOV-NEXT: sgtz a3, a7 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a7 +; CHECK-NOV-NEXT: sgtz a7, a6 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a6, a7, a6 +; CHECK-NOV-NEXT: sgtz a7, a5 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a5, a7, a5 +; CHECK-NOV-NEXT: sgtz a7, a4 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a4, a7, a4 +; CHECK-NOV-NEXT: sgtz a7, a2 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: sgtz a7, a1 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a1, a7, a1 +; CHECK-NOV-NEXT: sgtz a7, s2 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a7, a7, s2 +; CHECK-NOV-NEXT: sgtz t0, a0 +; CHECK-NOV-NEXT: neg t0, t0 +; CHECK-NOV-NEXT: and a0, t0, a0 ; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh s2, 12(s0) +; CHECK-NOV-NEXT: sh a7, 12(s0) ; CHECK-NOV-NEXT: sh a1, 10(s0) ; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) -; CHECK-NOV-NEXT: sh a4, 4(s0) -; CHECK-NOV-NEXT: sh a5, 2(s0) -; CHECK-NOV-NEXT: sh a6, 0(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a3, 0(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -1932,58 +1904,34 @@ ; CHECK-NOV-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 128 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB17_18: # %entry -; CHECK-NOV-NEXT: mv a0, a7 +; CHECK-NOV-NEXT: .LBB17_10: # %entry +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz -; CHECK-NOV-NEXT: blt s2, a7, .LBB17_2 -; CHECK-NOV-NEXT: .LBB17_19: # %entry -; CHECK-NOV-NEXT: mv s2, a7 +; CHECK-NOV-NEXT: blt s2, a3, .LBB17_2 +; CHECK-NOV-NEXT: .LBB17_11: # %entry +; CHECK-NOV-NEXT: mv s2, a3 ; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz -; CHECK-NOV-NEXT: blt a1, a7, .LBB17_3 -; CHECK-NOV-NEXT: .LBB17_20: # %entry -; CHECK-NOV-NEXT: mv a1, a7 -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: blt a2, a7, .LBB17_4 -; CHECK-NOV-NEXT: .LBB17_21: # %entry -; CHECK-NOV-NEXT: mv a2, a7 -; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: blt a3, a7, .LBB17_5 -; CHECK-NOV-NEXT: .LBB17_22: # %entry -; CHECK-NOV-NEXT: mv a3, a7 -; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz -; CHECK-NOV-NEXT: blt a4, a7, .LBB17_6 -; CHECK-NOV-NEXT: .LBB17_23: # %entry -; CHECK-NOV-NEXT: mv a4, a7 -; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz -; CHECK-NOV-NEXT: blt a5, a7, .LBB17_7 -; CHECK-NOV-NEXT: .LBB17_24: # %entry -; CHECK-NOV-NEXT: mv a5, a7 -; CHECK-NOV-NEXT: blt a6, a7, .LBB17_8 -; CHECK-NOV-NEXT: .LBB17_25: # %entry -; CHECK-NOV-NEXT: mv a6, a7 -; CHECK-NOV-NEXT: bgtz a6, .LBB17_9 -; CHECK-NOV-NEXT: .LBB17_26: # %entry -; CHECK-NOV-NEXT: li a6, 0 -; CHECK-NOV-NEXT: bgtz a5, .LBB17_10 -; CHECK-NOV-NEXT: .LBB17_27: # %entry -; CHECK-NOV-NEXT: li a5, 0 -; CHECK-NOV-NEXT: bgtz a4, .LBB17_11 -; CHECK-NOV-NEXT: .LBB17_28: # %entry -; CHECK-NOV-NEXT: li a4, 0 -; CHECK-NOV-NEXT: bgtz a3, .LBB17_12 -; CHECK-NOV-NEXT: .LBB17_29: # %entry -; CHECK-NOV-NEXT: li a3, 0 -; CHECK-NOV-NEXT: bgtz a2, .LBB17_13 -; CHECK-NOV-NEXT: .LBB17_30: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB17_14 -; CHECK-NOV-NEXT: .LBB17_31: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgtz s2, .LBB17_15 -; CHECK-NOV-NEXT: .LBB17_32: # %entry -; CHECK-NOV-NEXT: li s2, 0 -; CHECK-NOV-NEXT: blez a0, .LBB17_16 -; CHECK-NOV-NEXT: j .LBB17_17 +; CHECK-NOV-NEXT: blt a1, a3, .LBB17_3 +; CHECK-NOV-NEXT: .LBB17_12: # %entry +; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz +; CHECK-NOV-NEXT: blt a2, a3, .LBB17_4 +; CHECK-NOV-NEXT: .LBB17_13: # %entry +; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz +; CHECK-NOV-NEXT: blt a4, a3, .LBB17_5 +; CHECK-NOV-NEXT: .LBB17_14: # %entry +; CHECK-NOV-NEXT: mv a4, a3 +; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz +; CHECK-NOV-NEXT: blt a5, a3, .LBB17_6 +; CHECK-NOV-NEXT: .LBB17_15: # %entry +; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz +; CHECK-NOV-NEXT: blt a6, a3, .LBB17_7 +; CHECK-NOV-NEXT: .LBB17_16: # %entry +; CHECK-NOV-NEXT: mv a6, a3 +; CHECK-NOV-NEXT: bge a7, a3, .LBB17_8 +; CHECK-NOV-NEXT: j .LBB17_9 ; ; CHECK-V-LABEL: ustest_f16i16: ; CHECK-V: # %bb.0: # %entry @@ -2138,6 +2086,7 @@ ; CHECK-NOV-NEXT: bnez s1, .LBB18_4 ; CHECK-NOV-NEXT: .LBB18_2: ; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: beqz a5, .LBB18_5 ; CHECK-NOV-NEXT: j .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_3: @@ -2145,41 +2094,43 @@ ; CHECK-NOV-NEXT: beqz s1, .LBB18_2 ; CHECK-NOV-NEXT: .LBB18_4: # %entry ; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: bnez a5, .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_5: # %entry -; CHECK-NOV-NEXT: li s1, 0 ; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB18_6: # %entry -; CHECK-NOV-NEXT: beqz a4, .LBB18_10 +; CHECK-NOV-NEXT: addi a6, a6, -1 +; CHECK-NOV-NEXT: seqz a5, a4 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: bnez a4, .LBB18_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: beq a1, a0, .LBB18_11 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB18_8: # %entry -; CHECK-NOV-NEXT: slti a1, a1, 0 -; CHECK-NOV-NEXT: xori a1, a1, 1 -; CHECK-NOV-NEXT: bne s1, a0, .LBB18_12 -; CHECK-NOV-NEXT: .LBB18_9: -; CHECK-NOV-NEXT: sltu a0, a3, s0 +; CHECK-NOV-NEXT: and a4, a6, s1 +; CHECK-NOV-NEXT: slli a1, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11 +; CHECK-NOV-NEXT: # %bb.9: # %entry +; CHECK-NOV-NEXT: slti a3, a5, 0 +; CHECK-NOV-NEXT: xori a3, a3, 1 +; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12 +; CHECK-NOV-NEXT: .LBB18_10: +; CHECK-NOV-NEXT: sltu a0, a1, s0 ; CHECK-NOV-NEXT: beqz a0, .LBB18_13 ; CHECK-NOV-NEXT: j .LBB18_14 -; CHECK-NOV-NEXT: .LBB18_10: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: bne a1, a0, .LBB18_8 ; CHECK-NOV-NEXT: .LBB18_11: -; CHECK-NOV-NEXT: sltu a1, a3, a2 -; CHECK-NOV-NEXT: beq s1, a0, .LBB18_9 +; CHECK-NOV-NEXT: sltu a3, a1, a2 +; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10 ; CHECK-NOV-NEXT: .LBB18_12: # %entry -; CHECK-NOV-NEXT: slti a0, s1, 0 +; CHECK-NOV-NEXT: slti a0, a4, 0 ; CHECK-NOV-NEXT: xori a0, a0, 1 ; CHECK-NOV-NEXT: bnez a0, .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_13: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB18_14: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB18_16 +; CHECK-NOV-NEXT: bnez a3, .LBB18_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: .LBB18_16: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 @@ -2224,49 +2175,51 @@ ; CHECK-V-NEXT: bnez a1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: ; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: beqz a4, .LBB18_5 -; CHECK-V-NEXT: j .LBB18_6 +; CHECK-V-NEXT: j .LBB18_5 ; CHECK-V-NEXT: .LBB18_3: ; CHECK-V-NEXT: sltu a4, s0, a3 ; CHECK-V-NEXT: beqz a1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry ; CHECK-V-NEXT: slti a5, a1, 0 -; CHECK-V-NEXT: bnez a4, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry -; CHECK-V-NEXT: li s1, 0 +; CHECK-V-NEXT: seqz a6, a4 +; CHECK-V-NEXT: addi a6, a6, -1 +; CHECK-V-NEXT: bnez a4, .LBB18_7 +; CHECK-V-NEXT: # %bb.6: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: .LBB18_6: # %entry -; CHECK-V-NEXT: beqz a5, .LBB18_10 -; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq s1, a2, .LBB18_11 -; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: xori a4, a4, 1 -; CHECK-V-NEXT: bne a1, a2, .LBB18_12 -; CHECK-V-NEXT: .LBB18_9: -; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: beqz a4, .LBB18_13 -; CHECK-V-NEXT: j .LBB18_14 -; CHECK-V-NEXT: .LBB18_10: # %entry -; CHECK-V-NEXT: li a1, 0 +; CHECK-V-NEXT: .LBB18_7: # %entry +; CHECK-V-NEXT: and a6, a6, s1 +; CHECK-V-NEXT: seqz a4, a5 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: bnez a5, .LBB18_9 +; CHECK-V-NEXT: # %bb.8: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: bne s1, a2, .LBB18_8 +; CHECK-V-NEXT: .LBB18_9: # %entry +; CHECK-V-NEXT: and a4, a4, a1 +; CHECK-V-NEXT: slli a1, a2, 63 +; CHECK-V-NEXT: beq a6, a2, .LBB18_12 +; CHECK-V-NEXT: # %bb.10: # %entry +; CHECK-V-NEXT: slti a3, a6, 0 +; CHECK-V-NEXT: xori a3, a3, 1 +; CHECK-V-NEXT: bne a4, a2, .LBB18_13 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a4, a3, s0 -; CHECK-V-NEXT: beq a1, a2, .LBB18_9 -; CHECK-V-NEXT: .LBB18_12: # %entry -; CHECK-V-NEXT: slti a1, a1, 0 -; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: bnez a4, .LBB18_14 +; CHECK-V-NEXT: sltu a2, a1, a0 +; CHECK-V-NEXT: beqz a3, .LBB18_14 +; CHECK-V-NEXT: j .LBB18_15 +; CHECK-V-NEXT: .LBB18_12: +; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: beq a4, a2, .LBB18_11 ; CHECK-V-NEXT: .LBB18_13: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: slti a2, a4, 0 +; CHECK-V-NEXT: xori a2, a2, 1 +; CHECK-V-NEXT: bnez a3, .LBB18_15 ; CHECK-V-NEXT: .LBB18_14: # %entry -; CHECK-V-NEXT: bnez a1, .LBB18_16 -; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB18_16: # %entry +; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: .LBB18_15: # %entry +; CHECK-V-NEXT: bnez a2, .LBB18_17 +; CHECK-V-NEXT: # %bb.16: # %entry +; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: .LBB18_17: # %entry ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd s0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -2314,15 +2267,12 @@ ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt -; CHECK-NOV-NEXT: beqz a1, .LBB19_2 -; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB19_2: # %entry -; CHECK-NOV-NEXT: beqz s1, .LBB19_4 -; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: .LBB19_4: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: snez a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2356,16 +2306,14 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt -; CHECK-V-NEXT: beqz s1, .LBB19_2 -; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: .LBB19_2: # %entry -; CHECK-V-NEXT: beqz a1, .LBB19_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: .LBB19_4: # %entry +; CHECK-V-NEXT: snez a2, s1 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) +; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -2410,50 +2358,41 @@ ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti@plt ; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: bgtz a1, .LBB20_7 +; CHECK-NOV-NEXT: blez a1, .LBB20_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: bgtz s1, .LBB20_8 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB20_2: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB20_9 -; CHECK-NOV-NEXT: .LBB20_3: # %entry -; CHECK-NOV-NEXT: bgtz s1, .LBB20_10 +; CHECK-NOV-NEXT: mv a3, s1 +; CHECK-NOV-NEXT: blez s1, .LBB20_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB20_4: # %entry -; CHECK-NOV-NEXT: beqz a3, .LBB20_11 -; CHECK-NOV-NEXT: .LBB20_5: # %entry -; CHECK-NOV-NEXT: sgtz a1, a3 -; CHECK-NOV-NEXT: bnez a2, .LBB20_12 +; CHECK-NOV-NEXT: sgtz a1, a1 +; CHECK-NOV-NEXT: addi a4, a1, -1 +; CHECK-NOV-NEXT: sgtz a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: beqz a3, .LBB20_7 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: sgtz a3, a3 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: bnez a2, .LBB20_8 ; CHECK-NOV-NEXT: .LBB20_6: ; CHECK-NOV-NEXT: snez a2, a0 -; CHECK-NOV-NEXT: beqz a2, .LBB20_13 -; CHECK-NOV-NEXT: j .LBB20_14 -; CHECK-NOV-NEXT: .LBB20_7: # %entry -; CHECK-NOV-NEXT: li a2, 1 -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: blez s1, .LBB20_2 -; CHECK-NOV-NEXT: .LBB20_8: # %entry -; CHECK-NOV-NEXT: li a3, 1 -; CHECK-NOV-NEXT: blez a1, .LBB20_3 -; CHECK-NOV-NEXT: .LBB20_9: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: blez s1, .LBB20_4 -; CHECK-NOV-NEXT: .LBB20_10: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: bnez a3, .LBB20_5 -; CHECK-NOV-NEXT: .LBB20_11: -; CHECK-NOV-NEXT: snez a1, s0 +; CHECK-NOV-NEXT: j .LBB20_9 +; CHECK-NOV-NEXT: .LBB20_7: +; CHECK-NOV-NEXT: snez a3, a1 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: beqz a2, .LBB20_6 -; CHECK-NOV-NEXT: .LBB20_12: # %entry +; CHECK-NOV-NEXT: .LBB20_8: # %entry ; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: bnez a2, .LBB20_14 -; CHECK-NOV-NEXT: .LBB20_13: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB20_14: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB20_16 -; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: .LBB20_16: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: .LBB20_9: # %entry +; CHECK-NOV-NEXT: seqz a2, a2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: seqz a2, a3 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2480,53 +2419,50 @@ ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: mv s1, a1 +; CHECK-V-NEXT: mv s1, a0 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt ; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: bgtz a1, .LBB20_6 +; CHECK-V-NEXT: blez a1, .LBB20_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgtz s1, .LBB20_7 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: bgtz a1, .LBB20_8 -; CHECK-V-NEXT: .LBB20_3: # %entry -; CHECK-V-NEXT: beqz a2, .LBB20_9 +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: blez s0, .LBB20_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: beqz a2, .LBB20_7 +; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a2 -; CHECK-V-NEXT: bnez s1, .LBB20_10 -; CHECK-V-NEXT: .LBB20_5: -; CHECK-V-NEXT: snez a2, s0 -; CHECK-V-NEXT: beqz a2, .LBB20_11 -; CHECK-V-NEXT: j .LBB20_12 -; CHECK-V-NEXT: .LBB20_6: # %entry -; CHECK-V-NEXT: li a2, 1 -; CHECK-V-NEXT: blez s1, .LBB20_2 -; CHECK-V-NEXT: .LBB20_7: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: li s1, 1 -; CHECK-V-NEXT: blez a1, .LBB20_3 -; CHECK-V-NEXT: .LBB20_8: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: bnez a2, .LBB20_4 -; CHECK-V-NEXT: .LBB20_9: +; CHECK-V-NEXT: and a2, a3, s1 +; CHECK-V-NEXT: bnez s0, .LBB20_8 +; CHECK-V-NEXT: .LBB20_6: +; CHECK-V-NEXT: snez a3, a2 +; CHECK-V-NEXT: j .LBB20_9 +; CHECK-V-NEXT: .LBB20_7: ; CHECK-V-NEXT: snez a1, a0 -; CHECK-V-NEXT: beqz s1, .LBB20_5 -; CHECK-V-NEXT: .LBB20_10: # %entry -; CHECK-V-NEXT: sgtz a2, s1 -; CHECK-V-NEXT: bnez a2, .LBB20_12 -; CHECK-V-NEXT: .LBB20_11: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: .LBB20_12: # %entry -; CHECK-V-NEXT: bnez a1, .LBB20_14 -; CHECK-V-NEXT: # %bb.13: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: .LBB20_14: # %entry +; CHECK-V-NEXT: and a2, a3, s1 +; CHECK-V-NEXT: beqz s0, .LBB20_6 +; CHECK-V-NEXT: .LBB20_8: # %entry +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: .LBB20_9: # %entry +; CHECK-V-NEXT: seqz a3, a3 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a2, a3, a2 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) +; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -2580,6 +2516,7 @@ ; CHECK-NOV-NEXT: bnez s1, .LBB21_4 ; CHECK-NOV-NEXT: .LBB21_2: ; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: beqz a5, .LBB21_5 ; CHECK-NOV-NEXT: j .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_3: @@ -2587,41 +2524,43 @@ ; CHECK-NOV-NEXT: beqz s1, .LBB21_2 ; CHECK-NOV-NEXT: .LBB21_4: # %entry ; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: bnez a5, .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_5: # %entry -; CHECK-NOV-NEXT: li s1, 0 ; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB21_6: # %entry -; CHECK-NOV-NEXT: beqz a4, .LBB21_10 +; CHECK-NOV-NEXT: addi a6, a6, -1 +; CHECK-NOV-NEXT: seqz a5, a4 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: bnez a4, .LBB21_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: beq a1, a0, .LBB21_11 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB21_8: # %entry -; CHECK-NOV-NEXT: slti a1, a1, 0 -; CHECK-NOV-NEXT: xori a1, a1, 1 -; CHECK-NOV-NEXT: bne s1, a0, .LBB21_12 -; CHECK-NOV-NEXT: .LBB21_9: -; CHECK-NOV-NEXT: sltu a0, a3, s0 +; CHECK-NOV-NEXT: and a4, a6, s1 +; CHECK-NOV-NEXT: slli a1, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11 +; CHECK-NOV-NEXT: # %bb.9: # %entry +; CHECK-NOV-NEXT: slti a3, a5, 0 +; CHECK-NOV-NEXT: xori a3, a3, 1 +; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12 +; CHECK-NOV-NEXT: .LBB21_10: +; CHECK-NOV-NEXT: sltu a0, a1, s0 ; CHECK-NOV-NEXT: beqz a0, .LBB21_13 ; CHECK-NOV-NEXT: j .LBB21_14 -; CHECK-NOV-NEXT: .LBB21_10: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: bne a1, a0, .LBB21_8 ; CHECK-NOV-NEXT: .LBB21_11: -; CHECK-NOV-NEXT: sltu a1, a3, a2 -; CHECK-NOV-NEXT: beq s1, a0, .LBB21_9 +; CHECK-NOV-NEXT: sltu a3, a1, a2 +; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10 ; CHECK-NOV-NEXT: .LBB21_12: # %entry -; CHECK-NOV-NEXT: slti a0, s1, 0 +; CHECK-NOV-NEXT: slti a0, a4, 0 ; CHECK-NOV-NEXT: xori a0, a0, 1 ; CHECK-NOV-NEXT: bnez a0, .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_13: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB21_14: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB21_16 +; CHECK-NOV-NEXT: bnez a3, .LBB21_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: .LBB21_16: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 @@ -2666,49 +2605,51 @@ ; CHECK-V-NEXT: bnez a1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: ; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: beqz a4, .LBB21_5 -; CHECK-V-NEXT: j .LBB21_6 +; CHECK-V-NEXT: j .LBB21_5 ; CHECK-V-NEXT: .LBB21_3: ; CHECK-V-NEXT: sltu a4, s0, a3 ; CHECK-V-NEXT: beqz a1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry ; CHECK-V-NEXT: slti a5, a1, 0 -; CHECK-V-NEXT: bnez a4, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry -; CHECK-V-NEXT: li s1, 0 +; CHECK-V-NEXT: seqz a6, a4 +; CHECK-V-NEXT: addi a6, a6, -1 +; CHECK-V-NEXT: bnez a4, .LBB21_7 +; CHECK-V-NEXT: # %bb.6: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: .LBB21_6: # %entry -; CHECK-V-NEXT: beqz a5, .LBB21_10 -; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq s1, a2, .LBB21_11 -; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: xori a4, a4, 1 -; CHECK-V-NEXT: bne a1, a2, .LBB21_12 -; CHECK-V-NEXT: .LBB21_9: -; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: beqz a4, .LBB21_13 -; CHECK-V-NEXT: j .LBB21_14 -; CHECK-V-NEXT: .LBB21_10: # %entry -; CHECK-V-NEXT: li a1, 0 +; CHECK-V-NEXT: .LBB21_7: # %entry +; CHECK-V-NEXT: and a6, a6, s1 +; CHECK-V-NEXT: seqz a4, a5 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: bnez a5, .LBB21_9 +; CHECK-V-NEXT: # %bb.8: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: bne s1, a2, .LBB21_8 +; CHECK-V-NEXT: .LBB21_9: # %entry +; CHECK-V-NEXT: and a4, a4, a1 +; CHECK-V-NEXT: slli a1, a2, 63 +; CHECK-V-NEXT: beq a6, a2, .LBB21_12 +; CHECK-V-NEXT: # %bb.10: # %entry +; CHECK-V-NEXT: slti a3, a6, 0 +; CHECK-V-NEXT: xori a3, a3, 1 +; CHECK-V-NEXT: bne a4, a2, .LBB21_13 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a4, a3, s0 -; CHECK-V-NEXT: beq a1, a2, .LBB21_9 -; CHECK-V-NEXT: .LBB21_12: # %entry -; CHECK-V-NEXT: slti a1, a1, 0 -; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: bnez a4, .LBB21_14 +; CHECK-V-NEXT: sltu a2, a1, a0 +; CHECK-V-NEXT: beqz a3, .LBB21_14 +; CHECK-V-NEXT: j .LBB21_15 +; CHECK-V-NEXT: .LBB21_12: +; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: beq a4, a2, .LBB21_11 ; CHECK-V-NEXT: .LBB21_13: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: slti a2, a4, 0 +; CHECK-V-NEXT: xori a2, a2, 1 +; CHECK-V-NEXT: bnez a3, .LBB21_15 ; CHECK-V-NEXT: .LBB21_14: # %entry -; CHECK-V-NEXT: bnez a1, .LBB21_16 -; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB21_16: # %entry +; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: .LBB21_15: # %entry +; CHECK-V-NEXT: bnez a2, .LBB21_17 +; CHECK-V-NEXT: # %bb.16: # %entry +; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: .LBB21_17: # %entry ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd s0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -2756,15 +2697,12 @@ ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: beqz a1, .LBB22_2 -; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB22_2: # %entry -; CHECK-NOV-NEXT: beqz s1, .LBB22_4 -; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: .LBB22_4: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: snez a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2798,16 +2736,14 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: beqz s1, .LBB22_2 -; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: .LBB22_2: # %entry -; CHECK-V-NEXT: beqz a1, .LBB22_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: .LBB22_4: # %entry +; CHECK-V-NEXT: snez a2, s1 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) +; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -2852,50 +2788,41 @@ ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti@plt ; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: bgtz a1, .LBB23_7 +; CHECK-NOV-NEXT: blez a1, .LBB23_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: bgtz s1, .LBB23_8 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB23_2: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB23_9 -; CHECK-NOV-NEXT: .LBB23_3: # %entry -; CHECK-NOV-NEXT: bgtz s1, .LBB23_10 +; CHECK-NOV-NEXT: mv a3, s1 +; CHECK-NOV-NEXT: blez s1, .LBB23_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB23_4: # %entry -; CHECK-NOV-NEXT: beqz a3, .LBB23_11 -; CHECK-NOV-NEXT: .LBB23_5: # %entry -; CHECK-NOV-NEXT: sgtz a1, a3 -; CHECK-NOV-NEXT: bnez a2, .LBB23_12 +; CHECK-NOV-NEXT: sgtz a1, a1 +; CHECK-NOV-NEXT: addi a4, a1, -1 +; CHECK-NOV-NEXT: sgtz a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: beqz a3, .LBB23_7 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: sgtz a3, a3 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: bnez a2, .LBB23_8 ; CHECK-NOV-NEXT: .LBB23_6: ; CHECK-NOV-NEXT: snez a2, a0 -; CHECK-NOV-NEXT: beqz a2, .LBB23_13 -; CHECK-NOV-NEXT: j .LBB23_14 -; CHECK-NOV-NEXT: .LBB23_7: # %entry -; CHECK-NOV-NEXT: li a2, 1 -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: blez s1, .LBB23_2 -; CHECK-NOV-NEXT: .LBB23_8: # %entry -; CHECK-NOV-NEXT: li a3, 1 -; CHECK-NOV-NEXT: blez a1, .LBB23_3 -; CHECK-NOV-NEXT: .LBB23_9: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: blez s1, .LBB23_4 -; CHECK-NOV-NEXT: .LBB23_10: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: bnez a3, .LBB23_5 -; CHECK-NOV-NEXT: .LBB23_11: -; CHECK-NOV-NEXT: snez a1, s0 +; CHECK-NOV-NEXT: j .LBB23_9 +; CHECK-NOV-NEXT: .LBB23_7: +; CHECK-NOV-NEXT: snez a3, a1 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: beqz a2, .LBB23_6 -; CHECK-NOV-NEXT: .LBB23_12: # %entry +; CHECK-NOV-NEXT: .LBB23_8: # %entry ; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: bnez a2, .LBB23_14 -; CHECK-NOV-NEXT: .LBB23_13: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB23_14: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB23_16 -; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: .LBB23_16: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: .LBB23_9: # %entry +; CHECK-NOV-NEXT: seqz a2, a2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: seqz a2, a3 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2922,53 +2849,50 @@ ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: mv s1, a1 +; CHECK-V-NEXT: mv s1, a0 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: bgtz a1, .LBB23_6 +; CHECK-V-NEXT: blez a1, .LBB23_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgtz s1, .LBB23_7 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: bgtz a1, .LBB23_8 -; CHECK-V-NEXT: .LBB23_3: # %entry -; CHECK-V-NEXT: beqz a2, .LBB23_9 +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: blez s0, .LBB23_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: beqz a2, .LBB23_7 +; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a2 -; CHECK-V-NEXT: bnez s1, .LBB23_10 -; CHECK-V-NEXT: .LBB23_5: -; CHECK-V-NEXT: snez a2, s0 -; CHECK-V-NEXT: beqz a2, .LBB23_11 -; CHECK-V-NEXT: j .LBB23_12 -; CHECK-V-NEXT: .LBB23_6: # %entry -; CHECK-V-NEXT: li a2, 1 -; CHECK-V-NEXT: blez s1, .LBB23_2 -; CHECK-V-NEXT: .LBB23_7: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: li s1, 1 -; CHECK-V-NEXT: blez a1, .LBB23_3 -; CHECK-V-NEXT: .LBB23_8: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: bnez a2, .LBB23_4 -; CHECK-V-NEXT: .LBB23_9: +; CHECK-V-NEXT: and a2, a3, s1 +; CHECK-V-NEXT: bnez s0, .LBB23_8 +; CHECK-V-NEXT: .LBB23_6: +; CHECK-V-NEXT: snez a3, a2 +; CHECK-V-NEXT: j .LBB23_9 +; CHECK-V-NEXT: .LBB23_7: ; CHECK-V-NEXT: snez a1, a0 -; CHECK-V-NEXT: beqz s1, .LBB23_5 -; CHECK-V-NEXT: .LBB23_10: # %entry -; CHECK-V-NEXT: sgtz a2, s1 -; CHECK-V-NEXT: bnez a2, .LBB23_12 -; CHECK-V-NEXT: .LBB23_11: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: .LBB23_12: # %entry -; CHECK-V-NEXT: bnez a1, .LBB23_14 -; CHECK-V-NEXT: # %bb.13: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: .LBB23_14: # %entry +; CHECK-V-NEXT: and a2, a3, s1 +; CHECK-V-NEXT: beqz s0, .LBB23_6 +; CHECK-V-NEXT: .LBB23_8: # %entry +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: .LBB23_9: # %entry +; CHECK-V-NEXT: seqz a3, a3 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a2, a3, a2 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) +; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -3024,6 +2948,7 @@ ; CHECK-NOV-NEXT: bnez s1, .LBB24_4 ; CHECK-NOV-NEXT: .LBB24_2: ; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: beqz a5, .LBB24_5 ; CHECK-NOV-NEXT: j .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_3: @@ -3031,41 +2956,43 @@ ; CHECK-NOV-NEXT: beqz s1, .LBB24_2 ; CHECK-NOV-NEXT: .LBB24_4: # %entry ; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: bnez a5, .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_5: # %entry -; CHECK-NOV-NEXT: li s1, 0 ; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB24_6: # %entry -; CHECK-NOV-NEXT: beqz a4, .LBB24_10 +; CHECK-NOV-NEXT: addi a6, a6, -1 +; CHECK-NOV-NEXT: seqz a5, a4 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: bnez a4, .LBB24_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: beq a1, a0, .LBB24_11 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB24_8: # %entry -; CHECK-NOV-NEXT: slti a1, a1, 0 -; CHECK-NOV-NEXT: xori a1, a1, 1 -; CHECK-NOV-NEXT: bne s1, a0, .LBB24_12 -; CHECK-NOV-NEXT: .LBB24_9: -; CHECK-NOV-NEXT: sltu a0, a3, s0 +; CHECK-NOV-NEXT: and a4, a6, s1 +; CHECK-NOV-NEXT: slli a1, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11 +; CHECK-NOV-NEXT: # %bb.9: # %entry +; CHECK-NOV-NEXT: slti a3, a5, 0 +; CHECK-NOV-NEXT: xori a3, a3, 1 +; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12 +; CHECK-NOV-NEXT: .LBB24_10: +; CHECK-NOV-NEXT: sltu a0, a1, s0 ; CHECK-NOV-NEXT: beqz a0, .LBB24_13 ; CHECK-NOV-NEXT: j .LBB24_14 -; CHECK-NOV-NEXT: .LBB24_10: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: bne a1, a0, .LBB24_8 ; CHECK-NOV-NEXT: .LBB24_11: -; CHECK-NOV-NEXT: sltu a1, a3, a2 -; CHECK-NOV-NEXT: beq s1, a0, .LBB24_9 +; CHECK-NOV-NEXT: sltu a3, a1, a2 +; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10 ; CHECK-NOV-NEXT: .LBB24_12: # %entry -; CHECK-NOV-NEXT: slti a0, s1, 0 +; CHECK-NOV-NEXT: slti a0, a4, 0 ; CHECK-NOV-NEXT: xori a0, a0, 1 ; CHECK-NOV-NEXT: bnez a0, .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_13: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB24_14: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB24_16 +; CHECK-NOV-NEXT: bnez a3, .LBB24_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: .LBB24_16: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 @@ -3104,6 +3031,7 @@ ; CHECK-V-NEXT: bnez s1, .LBB24_4 ; CHECK-V-NEXT: .LBB24_2: ; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: seqz a6, a5 ; CHECK-V-NEXT: beqz a5, .LBB24_5 ; CHECK-V-NEXT: j .LBB24_6 ; CHECK-V-NEXT: .LBB24_3: @@ -3111,41 +3039,43 @@ ; CHECK-V-NEXT: beqz s1, .LBB24_2 ; CHECK-V-NEXT: .LBB24_4: # %entry ; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: seqz a6, a5 ; CHECK-V-NEXT: bnez a5, .LBB24_6 ; CHECK-V-NEXT: .LBB24_5: # %entry -; CHECK-V-NEXT: li s1, 0 ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB24_6: # %entry -; CHECK-V-NEXT: beqz a4, .LBB24_10 +; CHECK-V-NEXT: addi a6, a6, -1 +; CHECK-V-NEXT: seqz a5, a4 +; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: bnez a4, .LBB24_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq a1, a2, .LBB24_11 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB24_8: # %entry -; CHECK-V-NEXT: slti a1, a1, 0 -; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: bne s1, a2, .LBB24_12 -; CHECK-V-NEXT: .LBB24_9: -; CHECK-V-NEXT: sltu a2, a3, s0 +; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: slli a1, a2, 63 +; CHECK-V-NEXT: beq a5, a2, .LBB24_11 +; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: slti a3, a5, 0 +; CHECK-V-NEXT: xori a3, a3, 1 +; CHECK-V-NEXT: bne a4, a2, .LBB24_12 +; CHECK-V-NEXT: .LBB24_10: +; CHECK-V-NEXT: sltu a2, a1, s0 ; CHECK-V-NEXT: beqz a2, .LBB24_13 ; CHECK-V-NEXT: j .LBB24_14 -; CHECK-V-NEXT: .LBB24_10: # %entry -; CHECK-V-NEXT: li a1, 0 -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: bne a1, a2, .LBB24_8 ; CHECK-V-NEXT: .LBB24_11: -; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: beq s1, a2, .LBB24_9 +; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: beq a4, a2, .LBB24_10 ; CHECK-V-NEXT: .LBB24_12: # %entry -; CHECK-V-NEXT: slti a2, s1, 0 +; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB24_14 ; CHECK-V-NEXT: .LBB24_13: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB24_14: # %entry -; CHECK-V-NEXT: bnez a1, .LBB24_16 +; CHECK-V-NEXT: bnez a3, .LBB24_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB24_16: # %entry ; CHECK-V-NEXT: sd a0, 8(sp) ; CHECK-V-NEXT: sd s0, 0(sp) @@ -3185,24 +3115,21 @@ ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a0 +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: mv s1, a1 -; CHECK-NOV-NEXT: mv a0, s2 +; CHECK-NOV-NEXT: mv s1, a0 +; CHECK-NOV-NEXT: mv s2, a1 +; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: beqz a1, .LBB25_2 -; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB25_2: # %entry -; CHECK-NOV-NEXT: beqz s1, .LBB25_4 -; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: .LBB25_4: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: snez a1, s2 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3222,24 +3149,22 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a0 +; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: mv a0, s2 +; CHECK-V-NEXT: mv s1, a0 +; CHECK-V-NEXT: mv s2, a1 +; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: beqz a1, .LBB25_2 -; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: .LBB25_2: # %entry -; CHECK-V-NEXT: beqz s1, .LBB25_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: .LBB25_4: # %entry -; CHECK-V-NEXT: sd s0, 8(sp) +; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a1, s2 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s1 +; CHECK-V-NEXT: sd a1, 8(sp) ; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -3285,50 +3210,41 @@ ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixsfti@plt ; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: bgtz a1, .LBB26_7 +; CHECK-NOV-NEXT: blez a1, .LBB26_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: bgtz s1, .LBB26_8 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB26_2: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB26_9 -; CHECK-NOV-NEXT: .LBB26_3: # %entry -; CHECK-NOV-NEXT: bgtz s1, .LBB26_10 +; CHECK-NOV-NEXT: mv a3, s1 +; CHECK-NOV-NEXT: blez s1, .LBB26_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB26_4: # %entry -; CHECK-NOV-NEXT: beqz a3, .LBB26_11 -; CHECK-NOV-NEXT: .LBB26_5: # %entry -; CHECK-NOV-NEXT: sgtz a1, a3 -; CHECK-NOV-NEXT: bnez a2, .LBB26_12 +; CHECK-NOV-NEXT: sgtz a1, a1 +; CHECK-NOV-NEXT: addi a4, a1, -1 +; CHECK-NOV-NEXT: sgtz a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: beqz a3, .LBB26_7 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: sgtz a3, a3 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: bnez a2, .LBB26_8 ; CHECK-NOV-NEXT: .LBB26_6: ; CHECK-NOV-NEXT: snez a2, a0 -; CHECK-NOV-NEXT: beqz a2, .LBB26_13 -; CHECK-NOV-NEXT: j .LBB26_14 -; CHECK-NOV-NEXT: .LBB26_7: # %entry -; CHECK-NOV-NEXT: li a2, 1 -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: blez s1, .LBB26_2 -; CHECK-NOV-NEXT: .LBB26_8: # %entry -; CHECK-NOV-NEXT: li a3, 1 -; CHECK-NOV-NEXT: blez a1, .LBB26_3 -; CHECK-NOV-NEXT: .LBB26_9: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: blez s1, .LBB26_4 -; CHECK-NOV-NEXT: .LBB26_10: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: bnez a3, .LBB26_5 -; CHECK-NOV-NEXT: .LBB26_11: -; CHECK-NOV-NEXT: snez a1, s0 +; CHECK-NOV-NEXT: j .LBB26_9 +; CHECK-NOV-NEXT: .LBB26_7: +; CHECK-NOV-NEXT: snez a3, a1 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: beqz a2, .LBB26_6 -; CHECK-NOV-NEXT: .LBB26_12: # %entry +; CHECK-NOV-NEXT: .LBB26_8: # %entry ; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: bnez a2, .LBB26_14 -; CHECK-NOV-NEXT: .LBB26_13: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB26_14: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB26_16 -; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: .LBB26_16: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: .LBB26_9: # %entry +; CHECK-NOV-NEXT: seqz a2, a2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: seqz a2, a3 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3358,50 +3274,42 @@ ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: bgtz a1, .LBB26_7 +; CHECK-V-NEXT: blez a1, .LBB26_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: mv a3, s1 -; CHECK-V-NEXT: bgtz s1, .LBB26_8 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB26_2: # %entry -; CHECK-V-NEXT: bgtz a1, .LBB26_9 -; CHECK-V-NEXT: .LBB26_3: # %entry -; CHECK-V-NEXT: bgtz s1, .LBB26_10 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB26_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li a3, 1 ; CHECK-V-NEXT: .LBB26_4: # %entry -; CHECK-V-NEXT: beqz a3, .LBB26_11 -; CHECK-V-NEXT: .LBB26_5: # %entry -; CHECK-V-NEXT: sgtz a1, a3 -; CHECK-V-NEXT: bnez a2, .LBB26_12 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: addi a4, a1, -1 +; CHECK-V-NEXT: sgtz a1, s1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: beqz a3, .LBB26_7 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: sgtz a3, a3 +; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: bnez a2, .LBB26_8 ; CHECK-V-NEXT: .LBB26_6: ; CHECK-V-NEXT: snez a2, a0 -; CHECK-V-NEXT: beqz a2, .LBB26_13 -; CHECK-V-NEXT: j .LBB26_14 -; CHECK-V-NEXT: .LBB26_7: # %entry -; CHECK-V-NEXT: li a2, 1 -; CHECK-V-NEXT: mv a3, s1 -; CHECK-V-NEXT: blez s1, .LBB26_2 -; CHECK-V-NEXT: .LBB26_8: # %entry -; CHECK-V-NEXT: li a3, 1 -; CHECK-V-NEXT: blez a1, .LBB26_3 -; CHECK-V-NEXT: .LBB26_9: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: blez s1, .LBB26_4 -; CHECK-V-NEXT: .LBB26_10: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: bnez a3, .LBB26_5 -; CHECK-V-NEXT: .LBB26_11: -; CHECK-V-NEXT: snez a1, s0 +; CHECK-V-NEXT: j .LBB26_9 +; CHECK-V-NEXT: .LBB26_7: +; CHECK-V-NEXT: snez a3, a1 +; CHECK-V-NEXT: and a0, a4, a0 ; CHECK-V-NEXT: beqz a2, .LBB26_6 -; CHECK-V-NEXT: .LBB26_12: # %entry +; CHECK-V-NEXT: .LBB26_8: # %entry ; CHECK-V-NEXT: sgtz a2, a2 -; CHECK-V-NEXT: bnez a2, .LBB26_14 -; CHECK-V-NEXT: .LBB26_13: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: .LBB26_14: # %entry -; CHECK-V-NEXT: bnez a1, .LBB26_16 -; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: .LBB26_16: # %entry -; CHECK-V-NEXT: sd s0, 8(sp) +; CHECK-V-NEXT: .LBB26_9: # %entry +; CHECK-V-NEXT: seqz a2, a2 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a0, a2, a0 +; CHECK-V-NEXT: seqz a2, a3 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a1, a2, a1 +; CHECK-V-NEXT: sd a1, 8(sp) ; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -3521,26 +3429,20 @@ ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a2, a0, 32 ; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz -; CHECK-NOV-NEXT: bge a1, a2, .LBB29_5 +; CHECK-NOV-NEXT: blt a1, a2, .LBB29_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bge a0, a2, .LBB29_6 -; CHECK-NOV-NEXT: .LBB29_2: # %entry -; CHECK-NOV-NEXT: blez a0, .LBB29_7 -; CHECK-NOV-NEXT: .LBB29_3: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB29_8 -; CHECK-NOV-NEXT: .LBB29_4: # %entry -; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB29_5: # %entry ; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: blt a0, a2, .LBB29_2 -; CHECK-NOV-NEXT: .LBB29_6: # %entry +; CHECK-NOV-NEXT: .LBB29_2: # %entry +; CHECK-NOV-NEXT: blt a0, a2, .LBB29_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: bgtz a0, .LBB29_3 -; CHECK-NOV-NEXT: .LBB29_7: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB29_4 -; CHECK-NOV-NEXT: .LBB29_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 +; CHECK-NOV-NEXT: .LBB29_4: # %entry +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sgtz a2, a1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ret ; ; CHECK-V-LABEL: ustest_f64i32_mm: @@ -3699,57 +3601,49 @@ ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz ; CHECK-NOV-NEXT: li a2, -1 -; CHECK-NOV-NEXT: srli a5, a2, 32 +; CHECK-NOV-NEXT: srli a4, a2, 32 ; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a5, .LBB32_10 +; CHECK-NOV-NEXT: bge a1, a4, .LBB32_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a5, .LBB32_11 +; CHECK-NOV-NEXT: bge a2, a4, .LBB32_7 ; CHECK-NOV-NEXT: .LBB32_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz -; CHECK-NOV-NEXT: bge a3, a5, .LBB32_12 +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NOV-NEXT: bge a3, a4, .LBB32_8 ; CHECK-NOV-NEXT: .LBB32_3: # %entry -; CHECK-NOV-NEXT: bge a4, a5, .LBB32_13 +; CHECK-NOV-NEXT: blt a5, a4, .LBB32_5 ; CHECK-NOV-NEXT: .LBB32_4: # %entry -; CHECK-NOV-NEXT: blez a4, .LBB32_14 +; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB32_5: # %entry -; CHECK-NOV-NEXT: blez a3, .LBB32_15 -; CHECK-NOV-NEXT: .LBB32_6: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB32_16 -; CHECK-NOV-NEXT: .LBB32_7: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB32_9 -; CHECK-NOV-NEXT: .LBB32_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: .LBB32_9: # %entry +; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a4, a4, a5 +; CHECK-NOV-NEXT: sgtz a5, a3 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a3, a5, a3 +; CHECK-NOV-NEXT: sgtz a5, a2 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a2, a5, a2 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: sw a2, 8(a0) ; CHECK-NOV-NEXT: sw a3, 4(a0) ; CHECK-NOV-NEXT: sw a4, 0(a0) ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB32_10: # %entry -; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: .LBB32_6: # %entry +; CHECK-NOV-NEXT: mv a1, a4 ; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a5, .LBB32_2 -; CHECK-NOV-NEXT: .LBB32_11: # %entry -; CHECK-NOV-NEXT: mv a2, a5 -; CHECK-NOV-NEXT: fcvt.l.s a4, fa0, rtz -; CHECK-NOV-NEXT: blt a3, a5, .LBB32_3 -; CHECK-NOV-NEXT: .LBB32_12: # %entry -; CHECK-NOV-NEXT: mv a3, a5 -; CHECK-NOV-NEXT: blt a4, a5, .LBB32_4 -; CHECK-NOV-NEXT: .LBB32_13: # %entry -; CHECK-NOV-NEXT: mv a4, a5 -; CHECK-NOV-NEXT: bgtz a4, .LBB32_5 -; CHECK-NOV-NEXT: .LBB32_14: # %entry -; CHECK-NOV-NEXT: li a4, 0 -; CHECK-NOV-NEXT: bgtz a3, .LBB32_6 -; CHECK-NOV-NEXT: .LBB32_15: # %entry -; CHECK-NOV-NEXT: li a3, 0 -; CHECK-NOV-NEXT: bgtz a2, .LBB32_7 -; CHECK-NOV-NEXT: .LBB32_16: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: blez a1, .LBB32_8 -; CHECK-NOV-NEXT: j .LBB32_9 +; CHECK-NOV-NEXT: blt a2, a4, .LBB32_2 +; CHECK-NOV-NEXT: .LBB32_7: # %entry +; CHECK-NOV-NEXT: mv a2, a4 +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NOV-NEXT: blt a3, a4, .LBB32_3 +; CHECK-NOV-NEXT: .LBB32_8: # %entry +; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: bge a5, a4, .LBB32_4 +; CHECK-NOV-NEXT: j .LBB32_5 ; ; CHECK-V-LABEL: ustest_f32i32_mm: ; CHECK-V: # %bb.0: # %entry @@ -4127,29 +4021,33 @@ ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: li a1, -1 -; CHECK-NOV-NEXT: srli a3, a1, 32 -; CHECK-NOV-NEXT: bge a0, a3, .LBB35_10 +; CHECK-NOV-NEXT: srli a2, a1, 32 +; CHECK-NOV-NEXT: bge a0, a2, .LBB35_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz -; CHECK-NOV-NEXT: bge s2, a3, .LBB35_11 +; CHECK-NOV-NEXT: bge s2, a2, .LBB35_7 ; CHECK-NOV-NEXT: .LBB35_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz -; CHECK-NOV-NEXT: bge a1, a3, .LBB35_12 +; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz +; CHECK-NOV-NEXT: bge a1, a2, .LBB35_8 ; CHECK-NOV-NEXT: .LBB35_3: # %entry -; CHECK-NOV-NEXT: bge a2, a3, .LBB35_13 +; CHECK-NOV-NEXT: blt a3, a2, .LBB35_5 ; CHECK-NOV-NEXT: .LBB35_4: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB35_14 +; CHECK-NOV-NEXT: mv a3, a2 ; CHECK-NOV-NEXT: .LBB35_5: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB35_15 -; CHECK-NOV-NEXT: .LBB35_6: # %entry -; CHECK-NOV-NEXT: blez s2, .LBB35_16 -; CHECK-NOV-NEXT: .LBB35_7: # %entry -; CHECK-NOV-NEXT: bgtz a0, .LBB35_9 -; CHECK-NOV-NEXT: .LBB35_8: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB35_9: # %entry +; CHECK-NOV-NEXT: sgtz a2, a3 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a2, a2, a3 +; CHECK-NOV-NEXT: sgtz a3, a1 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a1, a3, a1 +; CHECK-NOV-NEXT: sgtz a3, s2 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, s2 +; CHECK-NOV-NEXT: sgtz a4, a0 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw s2, 8(s0) +; CHECK-NOV-NEXT: sw a3, 8(s0) ; CHECK-NOV-NEXT: sw a1, 4(s0) ; CHECK-NOV-NEXT: sw a2, 0(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -4162,30 +4060,18 @@ ; CHECK-NOV-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 64 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB35_10: # %entry -; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: .LBB35_6: # %entry +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz -; CHECK-NOV-NEXT: blt s2, a3, .LBB35_2 -; CHECK-NOV-NEXT: .LBB35_11: # %entry -; CHECK-NOV-NEXT: mv s2, a3 -; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz -; CHECK-NOV-NEXT: blt a1, a3, .LBB35_3 -; CHECK-NOV-NEXT: .LBB35_12: # %entry -; CHECK-NOV-NEXT: mv a1, a3 -; CHECK-NOV-NEXT: blt a2, a3, .LBB35_4 -; CHECK-NOV-NEXT: .LBB35_13: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bgtz a2, .LBB35_5 -; CHECK-NOV-NEXT: .LBB35_14: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB35_6 -; CHECK-NOV-NEXT: .LBB35_15: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgtz s2, .LBB35_7 -; CHECK-NOV-NEXT: .LBB35_16: # %entry -; CHECK-NOV-NEXT: li s2, 0 -; CHECK-NOV-NEXT: blez a0, .LBB35_8 -; CHECK-NOV-NEXT: j .LBB35_9 +; CHECK-NOV-NEXT: blt s2, a2, .LBB35_2 +; CHECK-NOV-NEXT: .LBB35_7: # %entry +; CHECK-NOV-NEXT: mv s2, a2 +; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB35_3 +; CHECK-NOV-NEXT: .LBB35_8: # %entry +; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: bge a3, a2, .LBB35_4 +; CHECK-NOV-NEXT: j .LBB35_5 ; ; CHECK-V-LABEL: ustest_f16i32_mm: ; CHECK-V: # %bb.0: # %entry @@ -4352,26 +4238,20 @@ ; CHECK-NOV-NEXT: lui a0, 16 ; CHECK-NOV-NEXT: addiw a2, a0, -1 ; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz -; CHECK-NOV-NEXT: bge a1, a2, .LBB38_5 +; CHECK-NOV-NEXT: blt a1, a2, .LBB38_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bge a0, a2, .LBB38_6 -; CHECK-NOV-NEXT: .LBB38_2: # %entry -; CHECK-NOV-NEXT: blez a0, .LBB38_7 -; CHECK-NOV-NEXT: .LBB38_3: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB38_8 -; CHECK-NOV-NEXT: .LBB38_4: # %entry -; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB38_5: # %entry ; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: blt a0, a2, .LBB38_2 -; CHECK-NOV-NEXT: .LBB38_6: # %entry +; CHECK-NOV-NEXT: .LBB38_2: # %entry +; CHECK-NOV-NEXT: blt a0, a2, .LBB38_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: bgtz a0, .LBB38_3 -; CHECK-NOV-NEXT: .LBB38_7: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB38_4 -; CHECK-NOV-NEXT: .LBB38_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 +; CHECK-NOV-NEXT: .LBB38_4: # %entry +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sgtz a2, a1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ret ; ; CHECK-V-LABEL: ustest_f64i16_mm: @@ -4531,57 +4411,49 @@ ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz ; CHECK-NOV-NEXT: lui a2, 16 -; CHECK-NOV-NEXT: addiw a5, a2, -1 +; CHECK-NOV-NEXT: addiw a4, a2, -1 ; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a5, .LBB41_10 +; CHECK-NOV-NEXT: bge a1, a4, .LBB41_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a5, .LBB41_11 +; CHECK-NOV-NEXT: bge a2, a4, .LBB41_7 ; CHECK-NOV-NEXT: .LBB41_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz -; CHECK-NOV-NEXT: bge a3, a5, .LBB41_12 +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz +; CHECK-NOV-NEXT: bge a3, a4, .LBB41_8 ; CHECK-NOV-NEXT: .LBB41_3: # %entry -; CHECK-NOV-NEXT: bge a4, a5, .LBB41_13 +; CHECK-NOV-NEXT: blt a5, a4, .LBB41_5 ; CHECK-NOV-NEXT: .LBB41_4: # %entry -; CHECK-NOV-NEXT: blez a4, .LBB41_14 +; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB41_5: # %entry -; CHECK-NOV-NEXT: blez a3, .LBB41_15 -; CHECK-NOV-NEXT: .LBB41_6: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB41_16 -; CHECK-NOV-NEXT: .LBB41_7: # %entry -; CHECK-NOV-NEXT: bgtz a1, .LBB41_9 -; CHECK-NOV-NEXT: .LBB41_8: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: .LBB41_9: # %entry +; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a4, a4, a5 +; CHECK-NOV-NEXT: sgtz a5, a3 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a3, a5, a3 +; CHECK-NOV-NEXT: sgtz a5, a2 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a2, a5, a2 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: sh a2, 4(a0) ; CHECK-NOV-NEXT: sh a3, 2(a0) ; CHECK-NOV-NEXT: sh a4, 0(a0) ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB41_10: # %entry -; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: .LBB41_6: # %entry +; CHECK-NOV-NEXT: mv a1, a4 ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a5, .LBB41_2 -; CHECK-NOV-NEXT: .LBB41_11: # %entry -; CHECK-NOV-NEXT: mv a2, a5 -; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz -; CHECK-NOV-NEXT: blt a3, a5, .LBB41_3 -; CHECK-NOV-NEXT: .LBB41_12: # %entry -; CHECK-NOV-NEXT: mv a3, a5 -; CHECK-NOV-NEXT: blt a4, a5, .LBB41_4 -; CHECK-NOV-NEXT: .LBB41_13: # %entry -; CHECK-NOV-NEXT: mv a4, a5 -; CHECK-NOV-NEXT: bgtz a4, .LBB41_5 -; CHECK-NOV-NEXT: .LBB41_14: # %entry -; CHECK-NOV-NEXT: li a4, 0 -; CHECK-NOV-NEXT: bgtz a3, .LBB41_6 -; CHECK-NOV-NEXT: .LBB41_15: # %entry -; CHECK-NOV-NEXT: li a3, 0 -; CHECK-NOV-NEXT: bgtz a2, .LBB41_7 -; CHECK-NOV-NEXT: .LBB41_16: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: blez a1, .LBB41_8 -; CHECK-NOV-NEXT: j .LBB41_9 +; CHECK-NOV-NEXT: blt a2, a4, .LBB41_2 +; CHECK-NOV-NEXT: .LBB41_7: # %entry +; CHECK-NOV-NEXT: mv a2, a4 +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz +; CHECK-NOV-NEXT: blt a3, a4, .LBB41_3 +; CHECK-NOV-NEXT: .LBB41_8: # %entry +; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: bge a5, a4, .LBB41_4 +; CHECK-NOV-NEXT: j .LBB41_5 ; ; CHECK-V-LABEL: ustest_f32i16_mm: ; CHECK-V: # %bb.0: # %entry @@ -5259,55 +5131,63 @@ ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a1, 16 -; CHECK-NOV-NEXT: addiw a7, a1, -1 -; CHECK-NOV-NEXT: bge a0, a7, .LBB44_18 +; CHECK-NOV-NEXT: addiw a3, a1, -1 +; CHECK-NOV-NEXT: bge a0, a3, .LBB44_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz -; CHECK-NOV-NEXT: bge s2, a7, .LBB44_19 +; CHECK-NOV-NEXT: bge s2, a3, .LBB44_11 ; CHECK-NOV-NEXT: .LBB44_2: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz -; CHECK-NOV-NEXT: bge a1, a7, .LBB44_20 +; CHECK-NOV-NEXT: bge a1, a3, .LBB44_12 ; CHECK-NOV-NEXT: .LBB44_3: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: bge a2, a7, .LBB44_21 +; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz +; CHECK-NOV-NEXT: bge a2, a3, .LBB44_13 ; CHECK-NOV-NEXT: .LBB44_4: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: bge a3, a7, .LBB44_22 +; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz +; CHECK-NOV-NEXT: bge a4, a3, .LBB44_14 ; CHECK-NOV-NEXT: .LBB44_5: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz -; CHECK-NOV-NEXT: bge a4, a7, .LBB44_23 +; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz +; CHECK-NOV-NEXT: bge a5, a3, .LBB44_15 ; CHECK-NOV-NEXT: .LBB44_6: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz -; CHECK-NOV-NEXT: bge a5, a7, .LBB44_24 +; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz +; CHECK-NOV-NEXT: bge a6, a3, .LBB44_16 ; CHECK-NOV-NEXT: .LBB44_7: # %entry -; CHECK-NOV-NEXT: bge a6, a7, .LBB44_25 +; CHECK-NOV-NEXT: blt a7, a3, .LBB44_9 ; CHECK-NOV-NEXT: .LBB44_8: # %entry -; CHECK-NOV-NEXT: blez a6, .LBB44_26 +; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB44_9: # %entry -; CHECK-NOV-NEXT: blez a5, .LBB44_27 -; CHECK-NOV-NEXT: .LBB44_10: # %entry -; CHECK-NOV-NEXT: blez a4, .LBB44_28 -; CHECK-NOV-NEXT: .LBB44_11: # %entry -; CHECK-NOV-NEXT: blez a3, .LBB44_29 -; CHECK-NOV-NEXT: .LBB44_12: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB44_30 -; CHECK-NOV-NEXT: .LBB44_13: # %entry -; CHECK-NOV-NEXT: blez a1, .LBB44_31 -; CHECK-NOV-NEXT: .LBB44_14: # %entry -; CHECK-NOV-NEXT: blez s2, .LBB44_32 -; CHECK-NOV-NEXT: .LBB44_15: # %entry -; CHECK-NOV-NEXT: bgtz a0, .LBB44_17 -; CHECK-NOV-NEXT: .LBB44_16: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: .LBB44_17: # %entry +; CHECK-NOV-NEXT: sgtz a3, a7 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a7 +; CHECK-NOV-NEXT: sgtz a7, a6 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a6, a7, a6 +; CHECK-NOV-NEXT: sgtz a7, a5 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a5, a7, a5 +; CHECK-NOV-NEXT: sgtz a7, a4 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a4, a7, a4 +; CHECK-NOV-NEXT: sgtz a7, a2 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: sgtz a7, a1 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a1, a7, a1 +; CHECK-NOV-NEXT: sgtz a7, s2 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a7, a7, s2 +; CHECK-NOV-NEXT: sgtz t0, a0 +; CHECK-NOV-NEXT: neg t0, t0 +; CHECK-NOV-NEXT: and a0, t0, a0 ; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh s2, 12(s0) +; CHECK-NOV-NEXT: sh a7, 12(s0) ; CHECK-NOV-NEXT: sh a1, 10(s0) ; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) -; CHECK-NOV-NEXT: sh a4, 4(s0) -; CHECK-NOV-NEXT: sh a5, 2(s0) -; CHECK-NOV-NEXT: sh a6, 0(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a3, 0(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -5326,58 +5206,34 @@ ; CHECK-NOV-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 128 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB44_18: # %entry -; CHECK-NOV-NEXT: mv a0, a7 +; CHECK-NOV-NEXT: .LBB44_10: # %entry +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz -; CHECK-NOV-NEXT: blt s2, a7, .LBB44_2 -; CHECK-NOV-NEXT: .LBB44_19: # %entry -; CHECK-NOV-NEXT: mv s2, a7 +; CHECK-NOV-NEXT: blt s2, a3, .LBB44_2 +; CHECK-NOV-NEXT: .LBB44_11: # %entry +; CHECK-NOV-NEXT: mv s2, a3 ; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz -; CHECK-NOV-NEXT: blt a1, a7, .LBB44_3 -; CHECK-NOV-NEXT: .LBB44_20: # %entry -; CHECK-NOV-NEXT: mv a1, a7 -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: blt a2, a7, .LBB44_4 -; CHECK-NOV-NEXT: .LBB44_21: # %entry -; CHECK-NOV-NEXT: mv a2, a7 -; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: blt a3, a7, .LBB44_5 -; CHECK-NOV-NEXT: .LBB44_22: # %entry -; CHECK-NOV-NEXT: mv a3, a7 -; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz -; CHECK-NOV-NEXT: blt a4, a7, .LBB44_6 -; CHECK-NOV-NEXT: .LBB44_23: # %entry -; CHECK-NOV-NEXT: mv a4, a7 -; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz -; CHECK-NOV-NEXT: blt a5, a7, .LBB44_7 -; CHECK-NOV-NEXT: .LBB44_24: # %entry -; CHECK-NOV-NEXT: mv a5, a7 -; CHECK-NOV-NEXT: blt a6, a7, .LBB44_8 -; CHECK-NOV-NEXT: .LBB44_25: # %entry -; CHECK-NOV-NEXT: mv a6, a7 -; CHECK-NOV-NEXT: bgtz a6, .LBB44_9 -; CHECK-NOV-NEXT: .LBB44_26: # %entry -; CHECK-NOV-NEXT: li a6, 0 -; CHECK-NOV-NEXT: bgtz a5, .LBB44_10 -; CHECK-NOV-NEXT: .LBB44_27: # %entry -; CHECK-NOV-NEXT: li a5, 0 -; CHECK-NOV-NEXT: bgtz a4, .LBB44_11 -; CHECK-NOV-NEXT: .LBB44_28: # %entry -; CHECK-NOV-NEXT: li a4, 0 -; CHECK-NOV-NEXT: bgtz a3, .LBB44_12 -; CHECK-NOV-NEXT: .LBB44_29: # %entry -; CHECK-NOV-NEXT: li a3, 0 -; CHECK-NOV-NEXT: bgtz a2, .LBB44_13 -; CHECK-NOV-NEXT: .LBB44_30: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bgtz a1, .LBB44_14 -; CHECK-NOV-NEXT: .LBB44_31: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgtz s2, .LBB44_15 -; CHECK-NOV-NEXT: .LBB44_32: # %entry -; CHECK-NOV-NEXT: li s2, 0 -; CHECK-NOV-NEXT: blez a0, .LBB44_16 -; CHECK-NOV-NEXT: j .LBB44_17 +; CHECK-NOV-NEXT: blt a1, a3, .LBB44_3 +; CHECK-NOV-NEXT: .LBB44_12: # %entry +; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz +; CHECK-NOV-NEXT: blt a2, a3, .LBB44_4 +; CHECK-NOV-NEXT: .LBB44_13: # %entry +; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz +; CHECK-NOV-NEXT: blt a4, a3, .LBB44_5 +; CHECK-NOV-NEXT: .LBB44_14: # %entry +; CHECK-NOV-NEXT: mv a4, a3 +; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz +; CHECK-NOV-NEXT: blt a5, a3, .LBB44_6 +; CHECK-NOV-NEXT: .LBB44_15: # %entry +; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz +; CHECK-NOV-NEXT: blt a6, a3, .LBB44_7 +; CHECK-NOV-NEXT: .LBB44_16: # %entry +; CHECK-NOV-NEXT: mv a6, a3 +; CHECK-NOV-NEXT: bge a7, a3, .LBB44_8 +; CHECK-NOV-NEXT: j .LBB44_9 ; ; CHECK-V-LABEL: ustest_f16i16_mm: ; CHECK-V: # %bb.0: # %entry @@ -5525,42 +5381,44 @@ ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a3, a0, 1 ; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB45_17 +; CHECK-NOV-NEXT: bgez a1, .LBB45_15 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB45_18 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB45_16 ; CHECK-NOV-NEXT: .LBB45_2: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB45_19 +; CHECK-NOV-NEXT: bnez a1, .LBB45_17 ; CHECK-NOV-NEXT: .LBB45_3: # %entry ; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bgez s1, .LBB45_20 +; CHECK-NOV-NEXT: bgez s1, .LBB45_18 ; CHECK-NOV-NEXT: .LBB45_4: # %entry -; CHECK-NOV-NEXT: bgeu s0, a3, .LBB45_21 +; CHECK-NOV-NEXT: bgeu s0, a3, .LBB45_19 ; CHECK-NOV-NEXT: .LBB45_5: # %entry -; CHECK-NOV-NEXT: bnez s1, .LBB45_22 +; CHECK-NOV-NEXT: beqz s1, .LBB45_7 ; CHECK-NOV-NEXT: .LBB45_6: # %entry -; CHECK-NOV-NEXT: bgez a1, .LBB45_23 +; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB45_7: # %entry -; CHECK-NOV-NEXT: bltz s1, .LBB45_9 -; CHECK-NOV-NEXT: .LBB45_8: # %entry -; CHECK-NOV-NEXT: li s1, 0 -; CHECK-NOV-NEXT: .LBB45_9: # %entry +; CHECK-NOV-NEXT: slti a6, a1, 0 +; CHECK-NOV-NEXT: slti a3, s1, 0 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a4, a3, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bltz s1, .LBB45_24 -; CHECK-NOV-NEXT: # %bb.10: # %entry -; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_25 +; CHECK-NOV-NEXT: mv a5, s0 +; CHECK-NOV-NEXT: bltz a4, .LBB45_20 +; CHECK-NOV-NEXT: # %bb.8: # %entry +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_21 +; CHECK-NOV-NEXT: .LBB45_9: # %entry +; CHECK-NOV-NEXT: and a1, a6, a1 +; CHECK-NOV-NEXT: bne a4, a0, .LBB45_22 +; CHECK-NOV-NEXT: .LBB45_10: # %entry +; CHECK-NOV-NEXT: mv a4, a2 +; CHECK-NOV-NEXT: bltz a1, .LBB45_23 ; CHECK-NOV-NEXT: .LBB45_11: # %entry -; CHECK-NOV-NEXT: bne s1, a0, .LBB45_26 +; CHECK-NOV-NEXT: bgeu a3, a2, .LBB45_24 ; CHECK-NOV-NEXT: .LBB45_12: # %entry -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bltz a1, .LBB45_27 +; CHECK-NOV-NEXT: beq a1, a0, .LBB45_14 ; CHECK-NOV-NEXT: .LBB45_13: # %entry -; CHECK-NOV-NEXT: bgeu a3, a2, .LBB45_28 -; CHECK-NOV-NEXT: .LBB45_14: # %entry -; CHECK-NOV-NEXT: beq a1, a0, .LBB45_16 -; CHECK-NOV-NEXT: .LBB45_15: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: .LBB45_16: # %entry +; CHECK-NOV-NEXT: .LBB45_14: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -5569,46 +5427,42 @@ ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB45_17: # %entry +; CHECK-NOV-NEXT: .LBB45_15: # %entry ; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: bltu a2, a3, .LBB45_2 -; CHECK-NOV-NEXT: .LBB45_18: # %entry +; CHECK-NOV-NEXT: .LBB45_16: # %entry ; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: beqz a1, .LBB45_3 -; CHECK-NOV-NEXT: .LBB45_19: # %entry +; CHECK-NOV-NEXT: .LBB45_17: # %entry ; CHECK-NOV-NEXT: mv a2, a4 ; CHECK-NOV-NEXT: mv a4, s0 ; CHECK-NOV-NEXT: bltz s1, .LBB45_4 -; CHECK-NOV-NEXT: .LBB45_20: # %entry +; CHECK-NOV-NEXT: .LBB45_18: # %entry ; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: bltu s0, a3, .LBB45_5 +; CHECK-NOV-NEXT: .LBB45_19: # %entry +; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: bnez s1, .LBB45_6 +; CHECK-NOV-NEXT: j .LBB45_7 +; CHECK-NOV-NEXT: .LBB45_20: # %entry +; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_9 ; CHECK-NOV-NEXT: .LBB45_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB45_6 +; CHECK-NOV-NEXT: and a1, a6, a1 +; CHECK-NOV-NEXT: beq a4, a0, .LBB45_10 ; CHECK-NOV-NEXT: .LBB45_22: # %entry -; CHECK-NOV-NEXT: mv s0, a4 -; CHECK-NOV-NEXT: bltz a1, .LBB45_7 -; CHECK-NOV-NEXT: .LBB45_23: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgez s1, .LBB45_8 -; CHECK-NOV-NEXT: j .LBB45_9 -; CHECK-NOV-NEXT: .LBB45_24: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_11 -; CHECK-NOV-NEXT: .LBB45_25: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: beq s1, a0, .LBB45_12 -; CHECK-NOV-NEXT: .LBB45_26: # %entry -; CHECK-NOV-NEXT: mv s0, a4 +; CHECK-NOV-NEXT: mv s0, a5 ; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB45_13 -; CHECK-NOV-NEXT: .LBB45_27: # %entry +; CHECK-NOV-NEXT: bgez a1, .LBB45_11 +; CHECK-NOV-NEXT: .LBB45_23: # %entry ; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, a2, .LBB45_14 -; CHECK-NOV-NEXT: .LBB45_28: # %entry +; CHECK-NOV-NEXT: bltu a3, a2, .LBB45_12 +; CHECK-NOV-NEXT: .LBB45_24: # %entry ; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bne a1, a0, .LBB45_15 -; CHECK-NOV-NEXT: j .LBB45_16 +; CHECK-NOV-NEXT: bne a1, a0, .LBB45_13 +; CHECK-NOV-NEXT: j .LBB45_14 ; ; CHECK-V-LABEL: stest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -5639,42 +5493,44 @@ ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 ; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bgez s1, .LBB45_17 +; CHECK-V-NEXT: bgez s1, .LBB45_15 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgeu s0, a3, .LBB45_18 +; CHECK-V-NEXT: bgeu s0, a3, .LBB45_16 ; CHECK-V-NEXT: .LBB45_2: # %entry -; CHECK-V-NEXT: bnez s1, .LBB45_19 +; CHECK-V-NEXT: bnez s1, .LBB45_17 ; CHECK-V-NEXT: .LBB45_3: # %entry ; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB45_20 +; CHECK-V-NEXT: bgez a1, .LBB45_18 ; CHECK-V-NEXT: .LBB45_4: # %entry -; CHECK-V-NEXT: bgeu a0, a3, .LBB45_21 +; CHECK-V-NEXT: bgeu a0, a3, .LBB45_19 ; CHECK-V-NEXT: .LBB45_5: # %entry -; CHECK-V-NEXT: bnez a1, .LBB45_22 +; CHECK-V-NEXT: beqz a1, .LBB45_7 ; CHECK-V-NEXT: .LBB45_6: # %entry -; CHECK-V-NEXT: bgez a1, .LBB45_23 +; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB45_7: # %entry -; CHECK-V-NEXT: bltz s1, .LBB45_9 -; CHECK-V-NEXT: .LBB45_8: # %entry -; CHECK-V-NEXT: li s1, 0 -; CHECK-V-NEXT: .LBB45_9: # %entry +; CHECK-V-NEXT: slti a3, s1, 0 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a4, a3, s1 +; CHECK-V-NEXT: slti a6, a1, 0 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bltz s1, .LBB45_24 -; CHECK-V-NEXT: # %bb.10: # %entry -; CHECK-V-NEXT: bgeu a3, s0, .LBB45_25 +; CHECK-V-NEXT: mv a5, s0 +; CHECK-V-NEXT: bltz a4, .LBB45_20 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: bgeu a3, s0, .LBB45_21 +; CHECK-V-NEXT: .LBB45_9: # %entry +; CHECK-V-NEXT: and a1, a6, a1 +; CHECK-V-NEXT: bne a4, a2, .LBB45_22 +; CHECK-V-NEXT: .LBB45_10: # %entry +; CHECK-V-NEXT: mv a4, a0 +; CHECK-V-NEXT: bltz a1, .LBB45_23 ; CHECK-V-NEXT: .LBB45_11: # %entry -; CHECK-V-NEXT: bne s1, a2, .LBB45_26 +; CHECK-V-NEXT: bgeu a3, a0, .LBB45_24 ; CHECK-V-NEXT: .LBB45_12: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB45_27 +; CHECK-V-NEXT: beq a1, a2, .LBB45_14 ; CHECK-V-NEXT: .LBB45_13: # %entry -; CHECK-V-NEXT: bgeu a3, a0, .LBB45_28 -; CHECK-V-NEXT: .LBB45_14: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB45_16 -; CHECK-V-NEXT: .LBB45_15: # %entry ; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: .LBB45_16: # %entry +; CHECK-V-NEXT: .LBB45_14: # %entry ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd s0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -5692,46 +5548,42 @@ ; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB45_17: # %entry +; CHECK-V-NEXT: .LBB45_15: # %entry ; CHECK-V-NEXT: mv a4, a3 ; CHECK-V-NEXT: bltu s0, a3, .LBB45_2 -; CHECK-V-NEXT: .LBB45_18: # %entry +; CHECK-V-NEXT: .LBB45_16: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: beqz s1, .LBB45_3 -; CHECK-V-NEXT: .LBB45_19: # %entry +; CHECK-V-NEXT: .LBB45_17: # %entry ; CHECK-V-NEXT: mv s0, a4 ; CHECK-V-NEXT: mv a4, a0 ; CHECK-V-NEXT: bltz a1, .LBB45_4 -; CHECK-V-NEXT: .LBB45_20: # %entry +; CHECK-V-NEXT: .LBB45_18: # %entry ; CHECK-V-NEXT: mv a4, a3 ; CHECK-V-NEXT: bltu a0, a3, .LBB45_5 -; CHECK-V-NEXT: .LBB45_21: # %entry +; CHECK-V-NEXT: .LBB45_19: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: beqz a1, .LBB45_6 -; CHECK-V-NEXT: .LBB45_22: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: bltz a1, .LBB45_7 -; CHECK-V-NEXT: .LBB45_23: # %entry -; CHECK-V-NEXT: li a1, 0 -; CHECK-V-NEXT: bgez s1, .LBB45_8 -; CHECK-V-NEXT: j .LBB45_9 -; CHECK-V-NEXT: .LBB45_24: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, s0, .LBB45_11 -; CHECK-V-NEXT: .LBB45_25: # %entry +; CHECK-V-NEXT: bnez a1, .LBB45_6 +; CHECK-V-NEXT: j .LBB45_7 +; CHECK-V-NEXT: .LBB45_20: # %entry +; CHECK-V-NEXT: mv a5, a3 +; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: bltu a3, s0, .LBB45_9 +; CHECK-V-NEXT: .LBB45_21: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: beq s1, a2, .LBB45_12 -; CHECK-V-NEXT: .LBB45_26: # %entry -; CHECK-V-NEXT: mv s0, a4 +; CHECK-V-NEXT: and a1, a6, a1 +; CHECK-V-NEXT: beq a4, a2, .LBB45_10 +; CHECK-V-NEXT: .LBB45_22: # %entry +; CHECK-V-NEXT: mv s0, a5 ; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB45_13 -; CHECK-V-NEXT: .LBB45_27: # %entry +; CHECK-V-NEXT: bgez a1, .LBB45_11 +; CHECK-V-NEXT: .LBB45_23: # %entry ; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, a0, .LBB45_14 -; CHECK-V-NEXT: .LBB45_28: # %entry +; CHECK-V-NEXT: bltu a3, a0, .LBB45_12 +; CHECK-V-NEXT: .LBB45_24: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bne a1, a2, .LBB45_15 -; CHECK-V-NEXT: j .LBB45_16 +; CHECK-V-NEXT: bne a1, a2, .LBB45_13 +; CHECK-V-NEXT: j .LBB45_14 entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -5760,36 +5612,26 @@ ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: mv a3, a1 -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: beqz a3, .LBB46_2 -; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: .LBB46_2: # %entry -; CHECK-NOV-NEXT: li a4, 1 -; CHECK-NOV-NEXT: mv a0, a1 -; CHECK-NOV-NEXT: bne a3, a4, .LBB46_7 -; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: bnez s1, .LBB46_8 -; CHECK-NOV-NEXT: .LBB46_4: # %entry -; CHECK-NOV-NEXT: beq s1, a4, .LBB46_6 -; CHECK-NOV-NEXT: .LBB46_5: # %entry -; CHECK-NOV-NEXT: mv a1, s0 -; CHECK-NOV-NEXT: .LBB46_6: # %entry +; CHECK-NOV-NEXT: snez a2, a1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: snez a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: addi a2, s1, -1 +; CHECK-NOV-NEXT: seqz a2, a2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB46_7: # %entry -; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: beqz s1, .LBB46_4 -; CHECK-NOV-NEXT: .LBB46_8: # %entry -; CHECK-NOV-NEXT: mv s0, a1 -; CHECK-NOV-NEXT: bne s1, a4, .LBB46_5 -; CHECK-NOV-NEXT: j .LBB46_6 ; ; CHECK-V-LABEL: utest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -5817,23 +5659,22 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt -; CHECK-V-NEXT: li a2, 0 -; CHECK-V-NEXT: beqz s1, .LBB46_2 -; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: mv s0, a2 -; CHECK-V-NEXT: .LBB46_2: # %entry -; CHECK-V-NEXT: li a4, 1 -; CHECK-V-NEXT: mv a3, a2 -; CHECK-V-NEXT: bne s1, a4, .LBB46_7 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: bnez a1, .LBB46_8 -; CHECK-V-NEXT: .LBB46_4: # %entry -; CHECK-V-NEXT: beq a1, a4, .LBB46_6 -; CHECK-V-NEXT: .LBB46_5: # %entry -; CHECK-V-NEXT: mv a2, a0 -; CHECK-V-NEXT: .LBB46_6: # %entry -; CHECK-V-NEXT: sd a2, 24(sp) -; CHECK-V-NEXT: sd a3, 32(sp) +; CHECK-V-NEXT: snez a2, s1 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: addi a3, s1, -1 +; CHECK-V-NEXT: seqz a3, a3 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a2, a3, a2 +; CHECK-V-NEXT: snez a3, a1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -5849,13 +5690,6 @@ ; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB46_7: # %entry -; CHECK-V-NEXT: mv a3, s0 -; CHECK-V-NEXT: beqz a1, .LBB46_4 -; CHECK-V-NEXT: .LBB46_8: # %entry -; CHECK-V-NEXT: mv a0, a2 -; CHECK-V-NEXT: bne a1, a4, .LBB46_5 -; CHECK-V-NEXT: j .LBB46_6 entry: %conv = fptoui <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -5883,74 +5717,47 @@ ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti@plt ; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: li a5, 1 -; CHECK-NOV-NEXT: mv a3, a1 -; CHECK-NOV-NEXT: bgtz a1, .LBB47_12 +; CHECK-NOV-NEXT: blez a1, .LBB47_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a4, s1 -; CHECK-NOV-NEXT: bgtz s1, .LBB47_13 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB47_2: # %entry -; CHECK-NOV-NEXT: bgtz a2, .LBB47_14 -; CHECK-NOV-NEXT: .LBB47_3: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bne a2, a5, .LBB47_15 +; CHECK-NOV-NEXT: mv a4, s1 +; CHECK-NOV-NEXT: blez s1, .LBB47_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry -; CHECK-NOV-NEXT: bgtz s1, .LBB47_16 -; CHECK-NOV-NEXT: .LBB47_5: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bne s1, a5, .LBB47_17 +; CHECK-NOV-NEXT: sgtz a3, a1 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: addi a0, a1, -1 +; CHECK-NOV-NEXT: seqz a0, a0 +; CHECK-NOV-NEXT: addi a1, a0, -1 +; CHECK-NOV-NEXT: sgtz a0, s1 +; CHECK-NOV-NEXT: addi a0, a0, -1 +; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: addi a5, s1, -1 +; CHECK-NOV-NEXT: seqz a5, a5 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a0, a5, a0 +; CHECK-NOV-NEXT: beqz a4, .LBB47_6 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: sgtz a4, a4 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: .LBB47_6: # %entry -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: blez a4, .LBB47_18 -; CHECK-NOV-NEXT: .LBB47_7: # %entry -; CHECK-NOV-NEXT: bnez a4, .LBB47_19 +; CHECK-NOV-NEXT: and a1, a1, a3 +; CHECK-NOV-NEXT: beqz a2, .LBB47_8 +; CHECK-NOV-NEXT: # %bb.7: # %entry +; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: .LBB47_8: # %entry -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: blez a3, .LBB47_20 -; CHECK-NOV-NEXT: .LBB47_9: # %entry -; CHECK-NOV-NEXT: beqz a3, .LBB47_11 -; CHECK-NOV-NEXT: .LBB47_10: # %entry -; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: .LBB47_11: # %entry ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB47_12: # %entry -; CHECK-NOV-NEXT: li a3, 1 -; CHECK-NOV-NEXT: mv a4, s1 -; CHECK-NOV-NEXT: blez s1, .LBB47_2 -; CHECK-NOV-NEXT: .LBB47_13: # %entry -; CHECK-NOV-NEXT: li a4, 1 -; CHECK-NOV-NEXT: blez a2, .LBB47_3 -; CHECK-NOV-NEXT: .LBB47_14: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: beq a2, a5, .LBB47_4 -; CHECK-NOV-NEXT: .LBB47_15: # %entry -; CHECK-NOV-NEXT: mv a1, a0 -; CHECK-NOV-NEXT: blez s1, .LBB47_5 -; CHECK-NOV-NEXT: .LBB47_16: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: beq s1, a5, .LBB47_6 -; CHECK-NOV-NEXT: .LBB47_17: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: bgtz a4, .LBB47_7 -; CHECK-NOV-NEXT: .LBB47_18: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: beqz a4, .LBB47_8 -; CHECK-NOV-NEXT: .LBB47_19: # %entry -; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: bgtz a3, .LBB47_9 -; CHECK-NOV-NEXT: .LBB47_20: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bnez a3, .LBB47_10 -; CHECK-NOV-NEXT: j .LBB47_11 ; ; CHECK-V-LABEL: ustest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -5978,36 +5785,43 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: li a5, 1 ; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: bgtz a1, .LBB47_12 +; CHECK-V-NEXT: blez a1, .LBB47_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgtz s0, .LBB47_13 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry -; CHECK-V-NEXT: li a3, 0 -; CHECK-V-NEXT: bne s0, a5, .LBB47_14 -; CHECK-V-NEXT: .LBB47_3: # %entry -; CHECK-V-NEXT: bgtz a1, .LBB47_15 +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: addi a4, s0, -1 +; CHECK-V-NEXT: seqz a4, a4 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: sgtz a5, a1 +; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a6, a1 +; CHECK-V-NEXT: blez s0, .LBB47_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB47_4: # %entry -; CHECK-V-NEXT: li a4, 0 -; CHECK-V-NEXT: bne a1, a5, .LBB47_16 -; CHECK-V-NEXT: .LBB47_5: # %entry -; CHECK-V-NEXT: bgtz s0, .LBB47_17 +; CHECK-V-NEXT: and a1, a5, a0 +; CHECK-V-NEXT: addi a5, a6, -1 +; CHECK-V-NEXT: and a0, a4, a3 +; CHECK-V-NEXT: beqz s0, .LBB47_6 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: .LBB47_6: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: blez s0, .LBB47_18 -; CHECK-V-NEXT: .LBB47_7: # %entry -; CHECK-V-NEXT: bnez s0, .LBB47_19 +; CHECK-V-NEXT: and a1, a5, a1 +; CHECK-V-NEXT: beqz a2, .LBB47_8 +; CHECK-V-NEXT: # %bb.7: # %entry +; CHECK-V-NEXT: sgtz a2, a2 +; CHECK-V-NEXT: neg a2, a2 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB47_8: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: blez a2, .LBB47_20 -; CHECK-V-NEXT: .LBB47_9: # %entry -; CHECK-V-NEXT: beqz a2, .LBB47_11 -; CHECK-V-NEXT: .LBB47_10: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: .LBB47_11: # %entry -; CHECK-V-NEXT: sd a4, 24(sp) -; CHECK-V-NEXT: sd a3, 32(sp) +; CHECK-V-NEXT: sd a1, 24(sp) +; CHECK-V-NEXT: sd a0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6023,38 +5837,6 @@ ; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB47_12: # %entry -; CHECK-V-NEXT: li a2, 1 -; CHECK-V-NEXT: blez s0, .LBB47_2 -; CHECK-V-NEXT: .LBB47_13: # %entry -; CHECK-V-NEXT: li s1, 0 -; CHECK-V-NEXT: li a3, 0 -; CHECK-V-NEXT: beq s0, a5, .LBB47_3 -; CHECK-V-NEXT: .LBB47_14: # %entry -; CHECK-V-NEXT: mv a3, s1 -; CHECK-V-NEXT: blez a1, .LBB47_4 -; CHECK-V-NEXT: .LBB47_15: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: li a4, 0 -; CHECK-V-NEXT: beq a1, a5, .LBB47_5 -; CHECK-V-NEXT: .LBB47_16: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: blez s0, .LBB47_6 -; CHECK-V-NEXT: .LBB47_17: # %entry -; CHECK-V-NEXT: li s0, 1 -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bgtz s0, .LBB47_7 -; CHECK-V-NEXT: .LBB47_18: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: beqz s0, .LBB47_8 -; CHECK-V-NEXT: .LBB47_19: # %entry -; CHECK-V-NEXT: mv a3, a0 -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: bgtz a2, .LBB47_9 -; CHECK-V-NEXT: .LBB47_20: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: bnez a2, .LBB47_10 -; CHECK-V-NEXT: j .LBB47_11 entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6086,42 +5868,44 @@ ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a3, a0, 1 ; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB48_17 +; CHECK-NOV-NEXT: bgez a1, .LBB48_15 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB48_18 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB48_16 ; CHECK-NOV-NEXT: .LBB48_2: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB48_19 +; CHECK-NOV-NEXT: bnez a1, .LBB48_17 ; CHECK-NOV-NEXT: .LBB48_3: # %entry ; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bgez s1, .LBB48_20 +; CHECK-NOV-NEXT: bgez s1, .LBB48_18 ; CHECK-NOV-NEXT: .LBB48_4: # %entry -; CHECK-NOV-NEXT: bgeu s0, a3, .LBB48_21 +; CHECK-NOV-NEXT: bgeu s0, a3, .LBB48_19 ; CHECK-NOV-NEXT: .LBB48_5: # %entry -; CHECK-NOV-NEXT: bnez s1, .LBB48_22 +; CHECK-NOV-NEXT: beqz s1, .LBB48_7 ; CHECK-NOV-NEXT: .LBB48_6: # %entry -; CHECK-NOV-NEXT: bgez a1, .LBB48_23 +; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB48_7: # %entry -; CHECK-NOV-NEXT: bltz s1, .LBB48_9 -; CHECK-NOV-NEXT: .LBB48_8: # %entry -; CHECK-NOV-NEXT: li s1, 0 -; CHECK-NOV-NEXT: .LBB48_9: # %entry +; CHECK-NOV-NEXT: slti a6, a1, 0 +; CHECK-NOV-NEXT: slti a3, s1, 0 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a4, a3, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bltz s1, .LBB48_24 -; CHECK-NOV-NEXT: # %bb.10: # %entry -; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_25 +; CHECK-NOV-NEXT: mv a5, s0 +; CHECK-NOV-NEXT: bltz a4, .LBB48_20 +; CHECK-NOV-NEXT: # %bb.8: # %entry +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_21 +; CHECK-NOV-NEXT: .LBB48_9: # %entry +; CHECK-NOV-NEXT: and a1, a6, a1 +; CHECK-NOV-NEXT: bne a4, a0, .LBB48_22 +; CHECK-NOV-NEXT: .LBB48_10: # %entry +; CHECK-NOV-NEXT: mv a4, a2 +; CHECK-NOV-NEXT: bltz a1, .LBB48_23 ; CHECK-NOV-NEXT: .LBB48_11: # %entry -; CHECK-NOV-NEXT: bne s1, a0, .LBB48_26 +; CHECK-NOV-NEXT: bgeu a3, a2, .LBB48_24 ; CHECK-NOV-NEXT: .LBB48_12: # %entry -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bltz a1, .LBB48_27 +; CHECK-NOV-NEXT: beq a1, a0, .LBB48_14 ; CHECK-NOV-NEXT: .LBB48_13: # %entry -; CHECK-NOV-NEXT: bgeu a3, a2, .LBB48_28 -; CHECK-NOV-NEXT: .LBB48_14: # %entry -; CHECK-NOV-NEXT: beq a1, a0, .LBB48_16 -; CHECK-NOV-NEXT: .LBB48_15: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: .LBB48_16: # %entry +; CHECK-NOV-NEXT: .LBB48_14: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -6130,46 +5914,42 @@ ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB48_17: # %entry +; CHECK-NOV-NEXT: .LBB48_15: # %entry ; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: bltu a2, a3, .LBB48_2 -; CHECK-NOV-NEXT: .LBB48_18: # %entry +; CHECK-NOV-NEXT: .LBB48_16: # %entry ; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: beqz a1, .LBB48_3 -; CHECK-NOV-NEXT: .LBB48_19: # %entry +; CHECK-NOV-NEXT: .LBB48_17: # %entry ; CHECK-NOV-NEXT: mv a2, a4 ; CHECK-NOV-NEXT: mv a4, s0 ; CHECK-NOV-NEXT: bltz s1, .LBB48_4 -; CHECK-NOV-NEXT: .LBB48_20: # %entry +; CHECK-NOV-NEXT: .LBB48_18: # %entry ; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: bltu s0, a3, .LBB48_5 +; CHECK-NOV-NEXT: .LBB48_19: # %entry +; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: bnez s1, .LBB48_6 +; CHECK-NOV-NEXT: j .LBB48_7 +; CHECK-NOV-NEXT: .LBB48_20: # %entry +; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_9 ; CHECK-NOV-NEXT: .LBB48_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB48_6 +; CHECK-NOV-NEXT: and a1, a6, a1 +; CHECK-NOV-NEXT: beq a4, a0, .LBB48_10 ; CHECK-NOV-NEXT: .LBB48_22: # %entry -; CHECK-NOV-NEXT: mv s0, a4 -; CHECK-NOV-NEXT: bltz a1, .LBB48_7 -; CHECK-NOV-NEXT: .LBB48_23: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgez s1, .LBB48_8 -; CHECK-NOV-NEXT: j .LBB48_9 -; CHECK-NOV-NEXT: .LBB48_24: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_11 -; CHECK-NOV-NEXT: .LBB48_25: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: beq s1, a0, .LBB48_12 -; CHECK-NOV-NEXT: .LBB48_26: # %entry -; CHECK-NOV-NEXT: mv s0, a4 +; CHECK-NOV-NEXT: mv s0, a5 ; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB48_13 -; CHECK-NOV-NEXT: .LBB48_27: # %entry +; CHECK-NOV-NEXT: bgez a1, .LBB48_11 +; CHECK-NOV-NEXT: .LBB48_23: # %entry ; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, a2, .LBB48_14 -; CHECK-NOV-NEXT: .LBB48_28: # %entry +; CHECK-NOV-NEXT: bltu a3, a2, .LBB48_12 +; CHECK-NOV-NEXT: .LBB48_24: # %entry ; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bne a1, a0, .LBB48_15 -; CHECK-NOV-NEXT: j .LBB48_16 +; CHECK-NOV-NEXT: bne a1, a0, .LBB48_13 +; CHECK-NOV-NEXT: j .LBB48_14 ; ; CHECK-V-LABEL: stest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -6200,42 +5980,44 @@ ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 ; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bgez s1, .LBB48_17 +; CHECK-V-NEXT: bgez s1, .LBB48_15 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgeu s0, a3, .LBB48_18 +; CHECK-V-NEXT: bgeu s0, a3, .LBB48_16 ; CHECK-V-NEXT: .LBB48_2: # %entry -; CHECK-V-NEXT: bnez s1, .LBB48_19 +; CHECK-V-NEXT: bnez s1, .LBB48_17 ; CHECK-V-NEXT: .LBB48_3: # %entry ; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB48_20 +; CHECK-V-NEXT: bgez a1, .LBB48_18 ; CHECK-V-NEXT: .LBB48_4: # %entry -; CHECK-V-NEXT: bgeu a0, a3, .LBB48_21 +; CHECK-V-NEXT: bgeu a0, a3, .LBB48_19 ; CHECK-V-NEXT: .LBB48_5: # %entry -; CHECK-V-NEXT: bnez a1, .LBB48_22 +; CHECK-V-NEXT: beqz a1, .LBB48_7 ; CHECK-V-NEXT: .LBB48_6: # %entry -; CHECK-V-NEXT: bgez a1, .LBB48_23 +; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB48_7: # %entry -; CHECK-V-NEXT: bltz s1, .LBB48_9 -; CHECK-V-NEXT: .LBB48_8: # %entry -; CHECK-V-NEXT: li s1, 0 -; CHECK-V-NEXT: .LBB48_9: # %entry +; CHECK-V-NEXT: slti a3, s1, 0 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a4, a3, s1 +; CHECK-V-NEXT: slti a6, a1, 0 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bltz s1, .LBB48_24 -; CHECK-V-NEXT: # %bb.10: # %entry -; CHECK-V-NEXT: bgeu a3, s0, .LBB48_25 +; CHECK-V-NEXT: mv a5, s0 +; CHECK-V-NEXT: bltz a4, .LBB48_20 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: bgeu a3, s0, .LBB48_21 +; CHECK-V-NEXT: .LBB48_9: # %entry +; CHECK-V-NEXT: and a1, a6, a1 +; CHECK-V-NEXT: bne a4, a2, .LBB48_22 +; CHECK-V-NEXT: .LBB48_10: # %entry +; CHECK-V-NEXT: mv a4, a0 +; CHECK-V-NEXT: bltz a1, .LBB48_23 ; CHECK-V-NEXT: .LBB48_11: # %entry -; CHECK-V-NEXT: bne s1, a2, .LBB48_26 +; CHECK-V-NEXT: bgeu a3, a0, .LBB48_24 ; CHECK-V-NEXT: .LBB48_12: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB48_27 +; CHECK-V-NEXT: beq a1, a2, .LBB48_14 ; CHECK-V-NEXT: .LBB48_13: # %entry -; CHECK-V-NEXT: bgeu a3, a0, .LBB48_28 -; CHECK-V-NEXT: .LBB48_14: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB48_16 -; CHECK-V-NEXT: .LBB48_15: # %entry ; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: .LBB48_16: # %entry +; CHECK-V-NEXT: .LBB48_14: # %entry ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd s0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -6253,46 +6035,42 @@ ; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB48_17: # %entry +; CHECK-V-NEXT: .LBB48_15: # %entry ; CHECK-V-NEXT: mv a4, a3 ; CHECK-V-NEXT: bltu s0, a3, .LBB48_2 -; CHECK-V-NEXT: .LBB48_18: # %entry +; CHECK-V-NEXT: .LBB48_16: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: beqz s1, .LBB48_3 -; CHECK-V-NEXT: .LBB48_19: # %entry +; CHECK-V-NEXT: .LBB48_17: # %entry ; CHECK-V-NEXT: mv s0, a4 ; CHECK-V-NEXT: mv a4, a0 ; CHECK-V-NEXT: bltz a1, .LBB48_4 -; CHECK-V-NEXT: .LBB48_20: # %entry +; CHECK-V-NEXT: .LBB48_18: # %entry ; CHECK-V-NEXT: mv a4, a3 ; CHECK-V-NEXT: bltu a0, a3, .LBB48_5 -; CHECK-V-NEXT: .LBB48_21: # %entry +; CHECK-V-NEXT: .LBB48_19: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: beqz a1, .LBB48_6 -; CHECK-V-NEXT: .LBB48_22: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: bltz a1, .LBB48_7 -; CHECK-V-NEXT: .LBB48_23: # %entry -; CHECK-V-NEXT: li a1, 0 -; CHECK-V-NEXT: bgez s1, .LBB48_8 -; CHECK-V-NEXT: j .LBB48_9 -; CHECK-V-NEXT: .LBB48_24: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, s0, .LBB48_11 -; CHECK-V-NEXT: .LBB48_25: # %entry +; CHECK-V-NEXT: bnez a1, .LBB48_6 +; CHECK-V-NEXT: j .LBB48_7 +; CHECK-V-NEXT: .LBB48_20: # %entry +; CHECK-V-NEXT: mv a5, a3 +; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: bltu a3, s0, .LBB48_9 +; CHECK-V-NEXT: .LBB48_21: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: beq s1, a2, .LBB48_12 -; CHECK-V-NEXT: .LBB48_26: # %entry -; CHECK-V-NEXT: mv s0, a4 +; CHECK-V-NEXT: and a1, a6, a1 +; CHECK-V-NEXT: beq a4, a2, .LBB48_10 +; CHECK-V-NEXT: .LBB48_22: # %entry +; CHECK-V-NEXT: mv s0, a5 ; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB48_13 -; CHECK-V-NEXT: .LBB48_27: # %entry +; CHECK-V-NEXT: bgez a1, .LBB48_11 +; CHECK-V-NEXT: .LBB48_23: # %entry ; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, a0, .LBB48_14 -; CHECK-V-NEXT: .LBB48_28: # %entry +; CHECK-V-NEXT: bltu a3, a0, .LBB48_12 +; CHECK-V-NEXT: .LBB48_24: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bne a1, a2, .LBB48_15 -; CHECK-V-NEXT: j .LBB48_16 +; CHECK-V-NEXT: bne a1, a2, .LBB48_13 +; CHECK-V-NEXT: j .LBB48_14 entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6321,36 +6099,26 @@ ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: mv a3, a1 -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: beqz a3, .LBB49_2 -; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: .LBB49_2: # %entry -; CHECK-NOV-NEXT: li a4, 1 -; CHECK-NOV-NEXT: mv a0, a1 -; CHECK-NOV-NEXT: bne a3, a4, .LBB49_7 -; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: bnez s1, .LBB49_8 -; CHECK-NOV-NEXT: .LBB49_4: # %entry -; CHECK-NOV-NEXT: beq s1, a4, .LBB49_6 -; CHECK-NOV-NEXT: .LBB49_5: # %entry -; CHECK-NOV-NEXT: mv a1, s0 -; CHECK-NOV-NEXT: .LBB49_6: # %entry +; CHECK-NOV-NEXT: snez a2, a1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: snez a1, s1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: addi a2, s1, -1 +; CHECK-NOV-NEXT: seqz a2, a2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB49_7: # %entry -; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: beqz s1, .LBB49_4 -; CHECK-NOV-NEXT: .LBB49_8: # %entry -; CHECK-NOV-NEXT: mv s0, a1 -; CHECK-NOV-NEXT: bne s1, a4, .LBB49_5 -; CHECK-NOV-NEXT: j .LBB49_6 ; ; CHECK-V-LABEL: utest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -6378,23 +6146,22 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: li a2, 0 -; CHECK-V-NEXT: beqz s1, .LBB49_2 -; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: mv s0, a2 -; CHECK-V-NEXT: .LBB49_2: # %entry -; CHECK-V-NEXT: li a4, 1 -; CHECK-V-NEXT: mv a3, a2 -; CHECK-V-NEXT: bne s1, a4, .LBB49_7 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: bnez a1, .LBB49_8 -; CHECK-V-NEXT: .LBB49_4: # %entry -; CHECK-V-NEXT: beq a1, a4, .LBB49_6 -; CHECK-V-NEXT: .LBB49_5: # %entry -; CHECK-V-NEXT: mv a2, a0 -; CHECK-V-NEXT: .LBB49_6: # %entry -; CHECK-V-NEXT: sd a2, 24(sp) -; CHECK-V-NEXT: sd a3, 32(sp) +; CHECK-V-NEXT: snez a2, s1 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: addi a3, s1, -1 +; CHECK-V-NEXT: seqz a3, a3 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a2, a3, a2 +; CHECK-V-NEXT: snez a3, a1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6410,13 +6177,6 @@ ; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB49_7: # %entry -; CHECK-V-NEXT: mv a3, s0 -; CHECK-V-NEXT: beqz a1, .LBB49_4 -; CHECK-V-NEXT: .LBB49_8: # %entry -; CHECK-V-NEXT: mv a0, a2 -; CHECK-V-NEXT: bne a1, a4, .LBB49_5 -; CHECK-V-NEXT: j .LBB49_6 entry: %conv = fptoui <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6444,74 +6204,47 @@ ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti@plt ; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: li a5, 1 -; CHECK-NOV-NEXT: mv a3, a1 -; CHECK-NOV-NEXT: bgtz a1, .LBB50_12 +; CHECK-NOV-NEXT: blez a1, .LBB50_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a4, s1 -; CHECK-NOV-NEXT: bgtz s1, .LBB50_13 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB50_2: # %entry -; CHECK-NOV-NEXT: bgtz a2, .LBB50_14 -; CHECK-NOV-NEXT: .LBB50_3: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bne a2, a5, .LBB50_15 +; CHECK-NOV-NEXT: mv a4, s1 +; CHECK-NOV-NEXT: blez s1, .LBB50_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry -; CHECK-NOV-NEXT: bgtz s1, .LBB50_16 -; CHECK-NOV-NEXT: .LBB50_5: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bne s1, a5, .LBB50_17 +; CHECK-NOV-NEXT: sgtz a3, a1 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: addi a0, a1, -1 +; CHECK-NOV-NEXT: seqz a0, a0 +; CHECK-NOV-NEXT: addi a1, a0, -1 +; CHECK-NOV-NEXT: sgtz a0, s1 +; CHECK-NOV-NEXT: addi a0, a0, -1 +; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: addi a5, s1, -1 +; CHECK-NOV-NEXT: seqz a5, a5 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a0, a5, a0 +; CHECK-NOV-NEXT: beqz a4, .LBB50_6 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: sgtz a4, a4 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: .LBB50_6: # %entry -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: blez a4, .LBB50_18 -; CHECK-NOV-NEXT: .LBB50_7: # %entry -; CHECK-NOV-NEXT: bnez a4, .LBB50_19 +; CHECK-NOV-NEXT: and a1, a1, a3 +; CHECK-NOV-NEXT: beqz a2, .LBB50_8 +; CHECK-NOV-NEXT: # %bb.7: # %entry +; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: .LBB50_8: # %entry -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: blez a3, .LBB50_20 -; CHECK-NOV-NEXT: .LBB50_9: # %entry -; CHECK-NOV-NEXT: beqz a3, .LBB50_11 -; CHECK-NOV-NEXT: .LBB50_10: # %entry -; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: .LBB50_11: # %entry ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB50_12: # %entry -; CHECK-NOV-NEXT: li a3, 1 -; CHECK-NOV-NEXT: mv a4, s1 -; CHECK-NOV-NEXT: blez s1, .LBB50_2 -; CHECK-NOV-NEXT: .LBB50_13: # %entry -; CHECK-NOV-NEXT: li a4, 1 -; CHECK-NOV-NEXT: blez a2, .LBB50_3 -; CHECK-NOV-NEXT: .LBB50_14: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: beq a2, a5, .LBB50_4 -; CHECK-NOV-NEXT: .LBB50_15: # %entry -; CHECK-NOV-NEXT: mv a1, a0 -; CHECK-NOV-NEXT: blez s1, .LBB50_5 -; CHECK-NOV-NEXT: .LBB50_16: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: beq s1, a5, .LBB50_6 -; CHECK-NOV-NEXT: .LBB50_17: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: bgtz a4, .LBB50_7 -; CHECK-NOV-NEXT: .LBB50_18: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: beqz a4, .LBB50_8 -; CHECK-NOV-NEXT: .LBB50_19: # %entry -; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: bgtz a3, .LBB50_9 -; CHECK-NOV-NEXT: .LBB50_20: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bnez a3, .LBB50_10 -; CHECK-NOV-NEXT: j .LBB50_11 ; ; CHECK-V-LABEL: ustest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -6539,36 +6272,43 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: li a5, 1 ; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: bgtz a1, .LBB50_12 +; CHECK-V-NEXT: blez a1, .LBB50_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgtz s0, .LBB50_13 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry -; CHECK-V-NEXT: li a3, 0 -; CHECK-V-NEXT: bne s0, a5, .LBB50_14 -; CHECK-V-NEXT: .LBB50_3: # %entry -; CHECK-V-NEXT: bgtz a1, .LBB50_15 +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: addi a4, s0, -1 +; CHECK-V-NEXT: seqz a4, a4 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: sgtz a5, a1 +; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a6, a1 +; CHECK-V-NEXT: blez s0, .LBB50_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB50_4: # %entry -; CHECK-V-NEXT: li a4, 0 -; CHECK-V-NEXT: bne a1, a5, .LBB50_16 -; CHECK-V-NEXT: .LBB50_5: # %entry -; CHECK-V-NEXT: bgtz s0, .LBB50_17 +; CHECK-V-NEXT: and a1, a5, a0 +; CHECK-V-NEXT: addi a5, a6, -1 +; CHECK-V-NEXT: and a0, a4, a3 +; CHECK-V-NEXT: beqz s0, .LBB50_6 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: .LBB50_6: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: blez s0, .LBB50_18 -; CHECK-V-NEXT: .LBB50_7: # %entry -; CHECK-V-NEXT: bnez s0, .LBB50_19 +; CHECK-V-NEXT: and a1, a5, a1 +; CHECK-V-NEXT: beqz a2, .LBB50_8 +; CHECK-V-NEXT: # %bb.7: # %entry +; CHECK-V-NEXT: sgtz a2, a2 +; CHECK-V-NEXT: neg a2, a2 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB50_8: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: blez a2, .LBB50_20 -; CHECK-V-NEXT: .LBB50_9: # %entry -; CHECK-V-NEXT: beqz a2, .LBB50_11 -; CHECK-V-NEXT: .LBB50_10: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: .LBB50_11: # %entry -; CHECK-V-NEXT: sd a4, 24(sp) -; CHECK-V-NEXT: sd a3, 32(sp) +; CHECK-V-NEXT: sd a1, 24(sp) +; CHECK-V-NEXT: sd a0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6584,38 +6324,6 @@ ; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB50_12: # %entry -; CHECK-V-NEXT: li a2, 1 -; CHECK-V-NEXT: blez s0, .LBB50_2 -; CHECK-V-NEXT: .LBB50_13: # %entry -; CHECK-V-NEXT: li s1, 0 -; CHECK-V-NEXT: li a3, 0 -; CHECK-V-NEXT: beq s0, a5, .LBB50_3 -; CHECK-V-NEXT: .LBB50_14: # %entry -; CHECK-V-NEXT: mv a3, s1 -; CHECK-V-NEXT: blez a1, .LBB50_4 -; CHECK-V-NEXT: .LBB50_15: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: li a4, 0 -; CHECK-V-NEXT: beq a1, a5, .LBB50_5 -; CHECK-V-NEXT: .LBB50_16: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: blez s0, .LBB50_6 -; CHECK-V-NEXT: .LBB50_17: # %entry -; CHECK-V-NEXT: li s0, 1 -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bgtz s0, .LBB50_7 -; CHECK-V-NEXT: .LBB50_18: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: beqz s0, .LBB50_8 -; CHECK-V-NEXT: .LBB50_19: # %entry -; CHECK-V-NEXT: mv a3, a0 -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: bgtz a2, .LBB50_9 -; CHECK-V-NEXT: .LBB50_20: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: bnez a2, .LBB50_10 -; CHECK-V-NEXT: j .LBB50_11 entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6649,42 +6357,44 @@ ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a3, a0, 1 ; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB51_17 +; CHECK-NOV-NEXT: bgez a1, .LBB51_15 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB51_18 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB51_16 ; CHECK-NOV-NEXT: .LBB51_2: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB51_19 +; CHECK-NOV-NEXT: bnez a1, .LBB51_17 ; CHECK-NOV-NEXT: .LBB51_3: # %entry ; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bgez s1, .LBB51_20 +; CHECK-NOV-NEXT: bgez s1, .LBB51_18 ; CHECK-NOV-NEXT: .LBB51_4: # %entry -; CHECK-NOV-NEXT: bgeu s0, a3, .LBB51_21 +; CHECK-NOV-NEXT: bgeu s0, a3, .LBB51_19 ; CHECK-NOV-NEXT: .LBB51_5: # %entry -; CHECK-NOV-NEXT: bnez s1, .LBB51_22 +; CHECK-NOV-NEXT: beqz s1, .LBB51_7 ; CHECK-NOV-NEXT: .LBB51_6: # %entry -; CHECK-NOV-NEXT: bgez a1, .LBB51_23 +; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB51_7: # %entry -; CHECK-NOV-NEXT: bltz s1, .LBB51_9 -; CHECK-NOV-NEXT: .LBB51_8: # %entry -; CHECK-NOV-NEXT: li s1, 0 -; CHECK-NOV-NEXT: .LBB51_9: # %entry +; CHECK-NOV-NEXT: slti a6, a1, 0 +; CHECK-NOV-NEXT: slti a3, s1, 0 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a4, a3, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bltz s1, .LBB51_24 -; CHECK-NOV-NEXT: # %bb.10: # %entry -; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_25 +; CHECK-NOV-NEXT: mv a5, s0 +; CHECK-NOV-NEXT: bltz a4, .LBB51_20 +; CHECK-NOV-NEXT: # %bb.8: # %entry +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_21 +; CHECK-NOV-NEXT: .LBB51_9: # %entry +; CHECK-NOV-NEXT: and a1, a6, a1 +; CHECK-NOV-NEXT: bne a4, a0, .LBB51_22 +; CHECK-NOV-NEXT: .LBB51_10: # %entry +; CHECK-NOV-NEXT: mv a4, a2 +; CHECK-NOV-NEXT: bltz a1, .LBB51_23 ; CHECK-NOV-NEXT: .LBB51_11: # %entry -; CHECK-NOV-NEXT: bne s1, a0, .LBB51_26 +; CHECK-NOV-NEXT: bgeu a3, a2, .LBB51_24 ; CHECK-NOV-NEXT: .LBB51_12: # %entry -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bltz a1, .LBB51_27 +; CHECK-NOV-NEXT: beq a1, a0, .LBB51_14 ; CHECK-NOV-NEXT: .LBB51_13: # %entry -; CHECK-NOV-NEXT: bgeu a3, a2, .LBB51_28 -; CHECK-NOV-NEXT: .LBB51_14: # %entry -; CHECK-NOV-NEXT: beq a1, a0, .LBB51_16 -; CHECK-NOV-NEXT: .LBB51_15: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: .LBB51_16: # %entry +; CHECK-NOV-NEXT: .LBB51_14: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -6693,46 +6403,42 @@ ; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB51_17: # %entry +; CHECK-NOV-NEXT: .LBB51_15: # %entry ; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: bltu a2, a3, .LBB51_2 -; CHECK-NOV-NEXT: .LBB51_18: # %entry +; CHECK-NOV-NEXT: .LBB51_16: # %entry ; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: beqz a1, .LBB51_3 -; CHECK-NOV-NEXT: .LBB51_19: # %entry +; CHECK-NOV-NEXT: .LBB51_17: # %entry ; CHECK-NOV-NEXT: mv a2, a4 ; CHECK-NOV-NEXT: mv a4, s0 ; CHECK-NOV-NEXT: bltz s1, .LBB51_4 -; CHECK-NOV-NEXT: .LBB51_20: # %entry +; CHECK-NOV-NEXT: .LBB51_18: # %entry ; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: bltu s0, a3, .LBB51_5 +; CHECK-NOV-NEXT: .LBB51_19: # %entry +; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: bnez s1, .LBB51_6 +; CHECK-NOV-NEXT: j .LBB51_7 +; CHECK-NOV-NEXT: .LBB51_20: # %entry +; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_9 ; CHECK-NOV-NEXT: .LBB51_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB51_6 +; CHECK-NOV-NEXT: and a1, a6, a1 +; CHECK-NOV-NEXT: beq a4, a0, .LBB51_10 ; CHECK-NOV-NEXT: .LBB51_22: # %entry -; CHECK-NOV-NEXT: mv s0, a4 -; CHECK-NOV-NEXT: bltz a1, .LBB51_7 -; CHECK-NOV-NEXT: .LBB51_23: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bgez s1, .LBB51_8 -; CHECK-NOV-NEXT: j .LBB51_9 -; CHECK-NOV-NEXT: .LBB51_24: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_11 -; CHECK-NOV-NEXT: .LBB51_25: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: beq s1, a0, .LBB51_12 -; CHECK-NOV-NEXT: .LBB51_26: # %entry -; CHECK-NOV-NEXT: mv s0, a4 +; CHECK-NOV-NEXT: mv s0, a5 ; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB51_13 -; CHECK-NOV-NEXT: .LBB51_27: # %entry +; CHECK-NOV-NEXT: bgez a1, .LBB51_11 +; CHECK-NOV-NEXT: .LBB51_23: # %entry ; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, a2, .LBB51_14 -; CHECK-NOV-NEXT: .LBB51_28: # %entry +; CHECK-NOV-NEXT: bltu a3, a2, .LBB51_12 +; CHECK-NOV-NEXT: .LBB51_24: # %entry ; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bne a1, a0, .LBB51_15 -; CHECK-NOV-NEXT: j .LBB51_16 +; CHECK-NOV-NEXT: bne a1, a0, .LBB51_13 +; CHECK-NOV-NEXT: j .LBB51_14 ; ; CHECK-V-LABEL: stest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -6757,42 +6463,44 @@ ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 ; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB51_17 +; CHECK-V-NEXT: bgez a1, .LBB51_15 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgeu a0, a3, .LBB51_18 +; CHECK-V-NEXT: bgeu a0, a3, .LBB51_16 ; CHECK-V-NEXT: .LBB51_2: # %entry -; CHECK-V-NEXT: bnez a1, .LBB51_19 +; CHECK-V-NEXT: bnez a1, .LBB51_17 ; CHECK-V-NEXT: .LBB51_3: # %entry ; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bgez s1, .LBB51_20 +; CHECK-V-NEXT: bgez s1, .LBB51_18 ; CHECK-V-NEXT: .LBB51_4: # %entry -; CHECK-V-NEXT: bgeu s0, a3, .LBB51_21 +; CHECK-V-NEXT: bgeu s0, a3, .LBB51_19 ; CHECK-V-NEXT: .LBB51_5: # %entry -; CHECK-V-NEXT: bnez s1, .LBB51_22 +; CHECK-V-NEXT: beqz s1, .LBB51_7 ; CHECK-V-NEXT: .LBB51_6: # %entry -; CHECK-V-NEXT: bgez a1, .LBB51_23 +; CHECK-V-NEXT: mv s0, a4 ; CHECK-V-NEXT: .LBB51_7: # %entry -; CHECK-V-NEXT: bltz s1, .LBB51_9 -; CHECK-V-NEXT: .LBB51_8: # %entry -; CHECK-V-NEXT: li s1, 0 -; CHECK-V-NEXT: .LBB51_9: # %entry +; CHECK-V-NEXT: slti a6, a1, 0 +; CHECK-V-NEXT: slti a3, s1, 0 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a4, a3, s1 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bltz s1, .LBB51_24 -; CHECK-V-NEXT: # %bb.10: # %entry -; CHECK-V-NEXT: bgeu a3, s0, .LBB51_25 +; CHECK-V-NEXT: mv a5, s0 +; CHECK-V-NEXT: bltz a4, .LBB51_20 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: bgeu a3, s0, .LBB51_21 +; CHECK-V-NEXT: .LBB51_9: # %entry +; CHECK-V-NEXT: and a1, a6, a1 +; CHECK-V-NEXT: bne a4, a2, .LBB51_22 +; CHECK-V-NEXT: .LBB51_10: # %entry +; CHECK-V-NEXT: mv a4, a0 +; CHECK-V-NEXT: bltz a1, .LBB51_23 ; CHECK-V-NEXT: .LBB51_11: # %entry -; CHECK-V-NEXT: bne s1, a2, .LBB51_26 +; CHECK-V-NEXT: bgeu a3, a0, .LBB51_24 ; CHECK-V-NEXT: .LBB51_12: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB51_27 +; CHECK-V-NEXT: beq a1, a2, .LBB51_14 ; CHECK-V-NEXT: .LBB51_13: # %entry -; CHECK-V-NEXT: bgeu a3, a0, .LBB51_28 -; CHECK-V-NEXT: .LBB51_14: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB51_16 -; CHECK-V-NEXT: .LBB51_15: # %entry ; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: .LBB51_16: # %entry +; CHECK-V-NEXT: .LBB51_14: # %entry ; CHECK-V-NEXT: sd a0, 8(sp) ; CHECK-V-NEXT: sd s0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 @@ -6808,46 +6516,42 @@ ; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB51_17: # %entry +; CHECK-V-NEXT: .LBB51_15: # %entry ; CHECK-V-NEXT: mv a4, a3 ; CHECK-V-NEXT: bltu a0, a3, .LBB51_2 -; CHECK-V-NEXT: .LBB51_18: # %entry +; CHECK-V-NEXT: .LBB51_16: # %entry ; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: beqz a1, .LBB51_3 -; CHECK-V-NEXT: .LBB51_19: # %entry +; CHECK-V-NEXT: .LBB51_17: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: mv a4, s0 ; CHECK-V-NEXT: bltz s1, .LBB51_4 -; CHECK-V-NEXT: .LBB51_20: # %entry +; CHECK-V-NEXT: .LBB51_18: # %entry ; CHECK-V-NEXT: mv a4, a3 ; CHECK-V-NEXT: bltu s0, a3, .LBB51_5 +; CHECK-V-NEXT: .LBB51_19: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: bnez s1, .LBB51_6 +; CHECK-V-NEXT: j .LBB51_7 +; CHECK-V-NEXT: .LBB51_20: # %entry +; CHECK-V-NEXT: mv a5, a3 +; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: bltu a3, s0, .LBB51_9 ; CHECK-V-NEXT: .LBB51_21: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: beqz s1, .LBB51_6 +; CHECK-V-NEXT: and a1, a6, a1 +; CHECK-V-NEXT: beq a4, a2, .LBB51_10 ; CHECK-V-NEXT: .LBB51_22: # %entry -; CHECK-V-NEXT: mv s0, a4 -; CHECK-V-NEXT: bltz a1, .LBB51_7 -; CHECK-V-NEXT: .LBB51_23: # %entry -; CHECK-V-NEXT: li a1, 0 -; CHECK-V-NEXT: bgez s1, .LBB51_8 -; CHECK-V-NEXT: j .LBB51_9 -; CHECK-V-NEXT: .LBB51_24: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, s0, .LBB51_11 -; CHECK-V-NEXT: .LBB51_25: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: beq s1, a2, .LBB51_12 -; CHECK-V-NEXT: .LBB51_26: # %entry -; CHECK-V-NEXT: mv s0, a4 +; CHECK-V-NEXT: mv s0, a5 ; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB51_13 -; CHECK-V-NEXT: .LBB51_27: # %entry +; CHECK-V-NEXT: bgez a1, .LBB51_11 +; CHECK-V-NEXT: .LBB51_23: # %entry ; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, a0, .LBB51_14 -; CHECK-V-NEXT: .LBB51_28: # %entry +; CHECK-V-NEXT: bltu a3, a0, .LBB51_12 +; CHECK-V-NEXT: .LBB51_24: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bne a1, a2, .LBB51_15 -; CHECK-V-NEXT: j .LBB51_16 +; CHECK-V-NEXT: bne a1, a2, .LBB51_13 +; CHECK-V-NEXT: j .LBB51_14 entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6869,45 +6573,35 @@ ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a0 +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: mv s1, a1 -; CHECK-NOV-NEXT: mv a0, s2 +; CHECK-NOV-NEXT: mv s1, a0 +; CHECK-NOV-NEXT: mv s2, a1 +; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: mv a3, a1 -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: beqz a3, .LBB52_2 -; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: .LBB52_2: # %entry -; CHECK-NOV-NEXT: li a4, 1 -; CHECK-NOV-NEXT: mv a0, a1 -; CHECK-NOV-NEXT: bne a3, a4, .LBB52_7 -; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: bnez s1, .LBB52_8 -; CHECK-NOV-NEXT: .LBB52_4: # %entry -; CHECK-NOV-NEXT: beq s1, a4, .LBB52_6 -; CHECK-NOV-NEXT: .LBB52_5: # %entry -; CHECK-NOV-NEXT: mv a1, s0 -; CHECK-NOV-NEXT: .LBB52_6: # %entry +; CHECK-NOV-NEXT: snez a2, a1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: snez a1, s2 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, s1 +; CHECK-NOV-NEXT: addi a2, s2, -1 +; CHECK-NOV-NEXT: seqz a2, a2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB52_7: # %entry -; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: beqz s1, .LBB52_4 -; CHECK-NOV-NEXT: .LBB52_8: # %entry -; CHECK-NOV-NEXT: mv s0, a1 -; CHECK-NOV-NEXT: bne s1, a4, .LBB52_5 -; CHECK-NOV-NEXT: j .LBB52_6 ; ; CHECK-V-LABEL: utesth_f16i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -6921,32 +6615,31 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a0 +; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: mv a0, s2 +; CHECK-V-NEXT: mv s1, a0 +; CHECK-V-NEXT: mv s2, a1 +; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: li a2, 0 -; CHECK-V-NEXT: beqz a1, .LBB52_2 -; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: mv a0, a2 -; CHECK-V-NEXT: .LBB52_2: # %entry -; CHECK-V-NEXT: li a4, 1 -; CHECK-V-NEXT: mv a3, a2 -; CHECK-V-NEXT: bne a1, a4, .LBB52_7 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: bnez s1, .LBB52_8 -; CHECK-V-NEXT: .LBB52_4: # %entry -; CHECK-V-NEXT: beq s1, a4, .LBB52_6 -; CHECK-V-NEXT: .LBB52_5: # %entry -; CHECK-V-NEXT: mv a2, s0 -; CHECK-V-NEXT: .LBB52_6: # %entry -; CHECK-V-NEXT: sd a2, 8(sp) -; CHECK-V-NEXT: sd a3, 0(sp) +; CHECK-V-NEXT: snez a2, a1 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a0, a2, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a1, s2 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s1 +; CHECK-V-NEXT: addi a2, s2, -1 +; CHECK-V-NEXT: seqz a2, a2 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a1, a2, a1 +; CHECK-V-NEXT: sd a1, 8(sp) +; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v9, (a0) @@ -6960,13 +6653,6 @@ ; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB52_7: # %entry -; CHECK-V-NEXT: mv a3, a0 -; CHECK-V-NEXT: beqz s1, .LBB52_4 -; CHECK-V-NEXT: .LBB52_8: # %entry -; CHECK-V-NEXT: mv s0, a2 -; CHECK-V-NEXT: bne s1, a4, .LBB52_5 -; CHECK-V-NEXT: j .LBB52_6 entry: %conv = fptoui <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6996,74 +6682,47 @@ ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixsfti@plt ; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: li a5, 1 -; CHECK-NOV-NEXT: mv a3, a1 -; CHECK-NOV-NEXT: bgtz a1, .LBB53_12 +; CHECK-NOV-NEXT: blez a1, .LBB53_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a4, s1 -; CHECK-NOV-NEXT: bgtz s1, .LBB53_13 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB53_2: # %entry -; CHECK-NOV-NEXT: bgtz a2, .LBB53_14 -; CHECK-NOV-NEXT: .LBB53_3: # %entry -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: bne a2, a5, .LBB53_15 +; CHECK-NOV-NEXT: mv a4, s1 +; CHECK-NOV-NEXT: blez s1, .LBB53_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry -; CHECK-NOV-NEXT: bgtz s1, .LBB53_16 -; CHECK-NOV-NEXT: .LBB53_5: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: bne s1, a5, .LBB53_17 +; CHECK-NOV-NEXT: sgtz a3, a1 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: addi a0, a1, -1 +; CHECK-NOV-NEXT: seqz a0, a0 +; CHECK-NOV-NEXT: addi a1, a0, -1 +; CHECK-NOV-NEXT: sgtz a0, s1 +; CHECK-NOV-NEXT: addi a0, a0, -1 +; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: addi a5, s1, -1 +; CHECK-NOV-NEXT: seqz a5, a5 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a0, a5, a0 +; CHECK-NOV-NEXT: beqz a4, .LBB53_6 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: sgtz a4, a4 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: .LBB53_6: # %entry -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: blez a4, .LBB53_18 -; CHECK-NOV-NEXT: .LBB53_7: # %entry -; CHECK-NOV-NEXT: bnez a4, .LBB53_19 +; CHECK-NOV-NEXT: and a1, a1, a3 +; CHECK-NOV-NEXT: beqz a2, .LBB53_8 +; CHECK-NOV-NEXT: # %bb.7: # %entry +; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: .LBB53_8: # %entry -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: blez a3, .LBB53_20 -; CHECK-NOV-NEXT: .LBB53_9: # %entry -; CHECK-NOV-NEXT: beqz a3, .LBB53_11 -; CHECK-NOV-NEXT: .LBB53_10: # %entry -; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: .LBB53_11: # %entry ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB53_12: # %entry -; CHECK-NOV-NEXT: li a3, 1 -; CHECK-NOV-NEXT: mv a4, s1 -; CHECK-NOV-NEXT: blez s1, .LBB53_2 -; CHECK-NOV-NEXT: .LBB53_13: # %entry -; CHECK-NOV-NEXT: li a4, 1 -; CHECK-NOV-NEXT: blez a2, .LBB53_3 -; CHECK-NOV-NEXT: .LBB53_14: # %entry -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: li a1, 0 -; CHECK-NOV-NEXT: beq a2, a5, .LBB53_4 -; CHECK-NOV-NEXT: .LBB53_15: # %entry -; CHECK-NOV-NEXT: mv a1, a0 -; CHECK-NOV-NEXT: blez s1, .LBB53_5 -; CHECK-NOV-NEXT: .LBB53_16: # %entry -; CHECK-NOV-NEXT: li s0, 0 -; CHECK-NOV-NEXT: li a0, 0 -; CHECK-NOV-NEXT: beq s1, a5, .LBB53_6 -; CHECK-NOV-NEXT: .LBB53_17: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: bgtz a4, .LBB53_7 -; CHECK-NOV-NEXT: .LBB53_18: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: beqz a4, .LBB53_8 -; CHECK-NOV-NEXT: .LBB53_19: # %entry -; CHECK-NOV-NEXT: mv a0, a2 -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: bgtz a3, .LBB53_9 -; CHECK-NOV-NEXT: .LBB53_20: # %entry -; CHECK-NOV-NEXT: li a2, 0 -; CHECK-NOV-NEXT: bnez a3, .LBB53_10 -; CHECK-NOV-NEXT: j .LBB53_11 ; ; CHECK-V-LABEL: ustest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -7085,36 +6744,43 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: li a5, 1 ; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: bgtz a1, .LBB53_12 +; CHECK-V-NEXT: blez a1, .LBB53_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: mv a4, s1 -; CHECK-V-NEXT: bgtz s1, .LBB53_13 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB53_2: # %entry -; CHECK-V-NEXT: bgtz a1, .LBB53_14 -; CHECK-V-NEXT: .LBB53_3: # %entry -; CHECK-V-NEXT: li a3, 0 -; CHECK-V-NEXT: bne a1, a5, .LBB53_15 +; CHECK-V-NEXT: mv a4, s1 +; CHECK-V-NEXT: blez s1, .LBB53_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li a4, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry -; CHECK-V-NEXT: bgtz s1, .LBB53_16 -; CHECK-V-NEXT: .LBB53_5: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: bne s1, a5, .LBB53_17 +; CHECK-V-NEXT: sgtz a3, a1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a3, a3, a0 +; CHECK-V-NEXT: addi a0, a1, -1 +; CHECK-V-NEXT: seqz a0, a0 +; CHECK-V-NEXT: addi a1, a0, -1 +; CHECK-V-NEXT: sgtz a0, s1 +; CHECK-V-NEXT: addi a0, a0, -1 +; CHECK-V-NEXT: and a0, a0, s0 +; CHECK-V-NEXT: addi a5, s1, -1 +; CHECK-V-NEXT: seqz a5, a5 +; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: and a0, a5, a0 +; CHECK-V-NEXT: beqz a4, .LBB53_6 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: sgtz a4, a4 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 ; CHECK-V-NEXT: .LBB53_6: # %entry -; CHECK-V-NEXT: mv a1, a0 -; CHECK-V-NEXT: blez a4, .LBB53_18 -; CHECK-V-NEXT: .LBB53_7: # %entry -; CHECK-V-NEXT: bnez a4, .LBB53_19 +; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: beqz a2, .LBB53_8 +; CHECK-V-NEXT: # %bb.7: # %entry +; CHECK-V-NEXT: sgtz a2, a2 +; CHECK-V-NEXT: neg a2, a2 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB53_8: # %entry -; CHECK-V-NEXT: mv a1, a3 -; CHECK-V-NEXT: blez a2, .LBB53_20 -; CHECK-V-NEXT: .LBB53_9: # %entry -; CHECK-V-NEXT: beqz a2, .LBB53_11 -; CHECK-V-NEXT: .LBB53_10: # %entry -; CHECK-V-NEXT: mv a3, a1 -; CHECK-V-NEXT: .LBB53_11: # %entry -; CHECK-V-NEXT: sd a3, 8(sp) +; CHECK-V-NEXT: sd a1, 8(sp) ; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -7129,39 +6795,6 @@ ; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB53_12: # %entry -; CHECK-V-NEXT: li a2, 1 -; CHECK-V-NEXT: mv a4, s1 -; CHECK-V-NEXT: blez s1, .LBB53_2 -; CHECK-V-NEXT: .LBB53_13: # %entry -; CHECK-V-NEXT: li a4, 1 -; CHECK-V-NEXT: blez a1, .LBB53_3 -; CHECK-V-NEXT: .LBB53_14: # %entry -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: li a3, 0 -; CHECK-V-NEXT: beq a1, a5, .LBB53_4 -; CHECK-V-NEXT: .LBB53_15: # %entry -; CHECK-V-NEXT: mv a3, a0 -; CHECK-V-NEXT: blez s1, .LBB53_5 -; CHECK-V-NEXT: .LBB53_16: # %entry -; CHECK-V-NEXT: li s0, 0 -; CHECK-V-NEXT: li a0, 0 -; CHECK-V-NEXT: beq s1, a5, .LBB53_6 -; CHECK-V-NEXT: .LBB53_17: # %entry -; CHECK-V-NEXT: mv a0, s0 -; CHECK-V-NEXT: mv a1, a0 -; CHECK-V-NEXT: bgtz a4, .LBB53_7 -; CHECK-V-NEXT: .LBB53_18: # %entry -; CHECK-V-NEXT: li a1, 0 -; CHECK-V-NEXT: beqz a4, .LBB53_8 -; CHECK-V-NEXT: .LBB53_19: # %entry -; CHECK-V-NEXT: mv a0, a1 -; CHECK-V-NEXT: mv a1, a3 -; CHECK-V-NEXT: bgtz a2, .LBB53_9 -; CHECK-V-NEXT: .LBB53_20: # %entry -; CHECK-V-NEXT: li a1, 0 -; CHECK-V-NEXT: bnez a2, .LBB53_10 -; CHECK-V-NEXT: j .LBB53_11 entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -63,66 +63,66 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_si_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IZFH-NEXT: feq.s a0, ft0, ft0 -; RV32IZFH-NEXT: beqz a0, .LBB1_2 -; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) -; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft2, fa0 +; RV32IZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV32IZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IZFH-NEXT: .LBB1_2: # %start +; RV32IZFH-NEXT: feq.s a1, ft2, ft2 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_si_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: feq.s a0, ft0, ft0 -; RV64IZFH-NEXT: beqz a0, .LBB1_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV64IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_1) -; RV64IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) -; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft2, fa0 +; RV64IZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV64IZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB1_2: # %start +; RV64IZFH-NEXT: feq.s a1, ft2, ft2 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_si_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IDZFH-NEXT: feq.s a0, ft0, ft0 -; RV32IDZFH-NEXT: beqz a0, .LBB1_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_1) -; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) -; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; RV32IDZFH-NEXT: fcvt.s.h ft2, fa0 +; RV32IDZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IDZFH-NEXT: .LBB1_2: # %start +; RV32IDZFH-NEXT: feq.s a1, ft2, ft2 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_si_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: feq.s a0, ft0, ft0 -; RV64IDZFH-NEXT: beqz a0, .LBB1_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_1) -; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) -; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft2, fa0 +; RV64IDZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB1_2: # %start +; RV64IDZFH-NEXT: feq.s a1, ft2, ft2 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_si_h_sat: @@ -158,12 +158,9 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bnez a1, .LBB1_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: .LBB1_6: # %start +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -204,12 +201,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB1_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: .LBB1_6: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -327,25 +321,23 @@ ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and a0, a0, s0 ; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: call __fixunssfsi@plt ; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: bltz s2, .LBB3_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: .LBB3_2: # %start ; RV32I-NEXT: lui a0, 292864 ; RV32I-NEXT: addi a1, a0, -256 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bgtz a0, .LBB3_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s0, s3 -; RV32I-NEXT: .LBB3_4: # %start +; RV32I-NEXT: bgtz a0, .LBB3_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: slti a0, s2, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and s0, a0, s1 +; RV32I-NEXT: .LBB3_2: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -367,25 +359,23 @@ ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and a0, a0, s0 ; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: call __fixunssfdi@plt ; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt ; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s3, 0 -; RV64I-NEXT: bltz s2, .LBB3_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: .LBB3_2: # %start ; RV64I-NEXT: lui a0, 292864 ; RV64I-NEXT: addiw a1, a0, -256 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bgtz a0, .LBB3_4 -; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s0, s3 -; RV64I-NEXT: .LBB3_4: # %start +; RV64I-NEXT: bgtz a0, .LBB3_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s2, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and s0, a0, s1 +; RV64I-NEXT: .LBB3_2: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -446,29 +436,29 @@ define i32 @fcvt_w_h_sat(half %a) nounwind { ; CHECKIZFH-LABEL: fcvt_w_h_sat: ; CHECKIZFH: # %bb.0: # %start -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB5_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz -; CHECKIZFH-NEXT: .LBB5_2: # %start +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV32IDZFH-NEXT: beqz a0, .LBB5_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz -; RV32IDZFH-NEXT: .LBB5_2: # %start +; RV32IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IDZFH-NEXT: beqz a0, .LBB5_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz -; RV64IDZFH-NEXT: .LBB5_2: # %start +; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_w_h_sat: @@ -505,12 +495,9 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bnez a1, .LBB5_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: .LBB5_6: # %start +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -553,12 +540,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB5_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: .LBB5_6: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -692,42 +676,42 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_wu_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: feq.h a0, fa0, fa0 -; RV32IZFH-NEXT: beqz a0, .LBB8_2 -; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz -; RV32IZFH-NEXT: .LBB8_2: # %start +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB8_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 ; RV64IZFH-NEXT: slli a0, a0, 32 ; RV64IZFH-NEXT: srli a0, a0, 32 -; RV64IZFH-NEXT: .LBB8_2: # %start ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV32IDZFH-NEXT: beqz a0, .LBB8_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz -; RV32IDZFH-NEXT: .LBB8_2: # %start +; RV32IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IDZFH-NEXT: beqz a0, .LBB8_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a0, a1 ; RV64IDZFH-NEXT: slli a0, a0, 32 ; RV64IDZFH-NEXT: srli a0, a0, 32 -; RV64IDZFH-NEXT: .LBB8_2: # %start ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_h_sat: @@ -775,27 +759,27 @@ ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: bltz s2, .LBB8_2 -; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: .LBB8_2: # %start ; RV64I-NEXT: lui a0, 325632 ; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: blez a0, .LBB8_4 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: bgtz a0, .LBB8_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: j .LBB8_3 +; RV64I-NEXT: .LBB8_2: ; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: .LBB8_4: # %start -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: .LBB8_3: # %start ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -890,18 +874,16 @@ ; RV32IZFH-NEXT: addi a1, a3, -1 ; RV32IZFH-NEXT: .LBB10_4: # %start ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: bnez a3, .LBB10_6 -; RV32IZFH-NEXT: # %bb.5: # %start -; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB10_7 -; RV32IZFH-NEXT: .LBB10_6: -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: seqz a3, a3 +; RV32IZFH-NEXT: addi a3, a3, -1 +; RV32IZFH-NEXT: and a1, a3, a1 +; RV32IZFH-NEXT: seqz a4, s0 +; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 -; RV32IZFH-NEXT: .LBB10_7: # %start +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -910,11 +892,11 @@ ; ; RV64IZFH-LABEL: fcvt_l_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB10_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz -; RV64IZFH-NEXT: .LBB10_2: # %start +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_l_h_sat: @@ -942,18 +924,16 @@ ; RV32IDZFH-NEXT: addi a1, a3, -1 ; RV32IDZFH-NEXT: .LBB10_4: # %start ; RV32IDZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IDZFH-NEXT: bnez a3, .LBB10_6 -; RV32IDZFH-NEXT: # %bb.5: # %start -; RV32IDZFH-NEXT: li a1, 0 -; RV32IDZFH-NEXT: li a0, 0 -; RV32IDZFH-NEXT: j .LBB10_7 -; RV32IDZFH-NEXT: .LBB10_6: -; RV32IDZFH-NEXT: neg a3, s0 -; RV32IDZFH-NEXT: and a0, a3, a0 +; RV32IDZFH-NEXT: seqz a3, a3 +; RV32IDZFH-NEXT: addi a3, a3, -1 +; RV32IDZFH-NEXT: and a1, a3, a1 +; RV32IDZFH-NEXT: seqz a4, s0 +; RV32IDZFH-NEXT: addi a4, a4, -1 +; RV32IDZFH-NEXT: and a0, a4, a0 ; RV32IDZFH-NEXT: seqz a2, a2 ; RV32IDZFH-NEXT: addi a2, a2, -1 -; RV32IDZFH-NEXT: or a0, a0, a2 -; RV32IDZFH-NEXT: .LBB10_7: # %start +; RV32IDZFH-NEXT: or a0, a2, a0 +; RV32IDZFH-NEXT: and a0, a3, a0 ; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -962,11 +942,11 @@ ; ; RV64IDZFH-LABEL: fcvt_l_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IDZFH-NEXT: beqz a0, .LBB10_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz -; RV64IDZFH-NEXT: .LBB10_2: # %start +; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_l_h_sat: @@ -979,63 +959,57 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, 389120 -; RV32I-NEXT: addi s2, a0, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s5, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __fixsfdi@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv s4, a1 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 913408 -; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB10_2 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixsfdi@plt +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __unordsf2@plt +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: lui a0, 389120 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: bgtz a0, .LBB10_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s5, s5, s3 +; RV32I-NEXT: slti a0, s4, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and s1, s1, a0 ; RV32I-NEXT: .LBB10_2: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv s3, s0 -; RV32I-NEXT: bnez a0, .LBB10_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s3, s5 -; RV32I-NEXT: .LBB10_4: # %start ; RV32I-NEXT: lui a1, 913408 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: lui s6, 524288 ; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: bltz a0, .LBB10_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s5, s4 -; RV32I-NEXT: .LBB10_6: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: bltz a0, .LBB10_4 +; RV32I-NEXT: # %bb.3: # %start +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: .LBB10_4: # %start +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s0, a0, .LBB10_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: addi s5, s6, -1 -; RV32I-NEXT: .LBB10_8: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bnez a0, .LBB10_10 -; RV32I-NEXT: # %bb.9: # %start -; RV32I-NEXT: mv s0, s5 -; RV32I-NEXT: .LBB10_10: # %start -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: blez a0, .LBB10_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: addi s4, s5, -1 +; RV32I-NEXT: .LBB10_6: # %start +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __unordsf2@plt +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a1, a0, s4 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1043,7 +1017,6 @@ ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1083,12 +1056,9 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bnez a1, .LBB10_7 -; RV64I-NEXT: # %bb.6: # %start -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: .LBB10_7: # %start +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1168,7 +1138,8 @@ ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: addi s0, a0, -1 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: lui a2, %hi(.LCPI12_0) @@ -1177,9 +1148,9 @@ ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 +; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a1, a2 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -1188,11 +1159,11 @@ ; ; RV64IZFH-LABEL: fcvt_lu_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB12_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz -; RV64IZFH-NEXT: .LBB12_2: # %start +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_lu_h_sat: @@ -1204,7 +1175,8 @@ ; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IDZFH-NEXT: fmv.w.x ft0, zero ; RV32IDZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IDZFH-NEXT: neg s0, a0 +; RV32IDZFH-NEXT: seqz a0, a0 +; RV32IDZFH-NEXT: addi s0, a0, -1 ; RV32IDZFH-NEXT: fmv.s fa0, fs0 ; RV32IDZFH-NEXT: call __fixunssfdi@plt ; RV32IDZFH-NEXT: lui a2, %hi(.LCPI12_0) @@ -1213,9 +1185,9 @@ ; RV32IDZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IDZFH-NEXT: seqz a2, a2 ; RV32IDZFH-NEXT: addi a2, a2, -1 -; RV32IDZFH-NEXT: or a0, a0, a2 +; RV32IDZFH-NEXT: or a0, a2, a0 ; RV32IDZFH-NEXT: and a1, s0, a1 -; RV32IDZFH-NEXT: or a1, a1, a2 +; RV32IDZFH-NEXT: or a1, a2, a1 ; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -1224,11 +1196,11 @@ ; ; RV64IDZFH-LABEL: fcvt_lu_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IDZFH-NEXT: beqz a0, .LBB12_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz -; RV64IDZFH-NEXT: .LBB12_2: # %start +; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_lu_h_sat: @@ -2234,66 +2206,66 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_w_s_sat_i16: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IZFH-NEXT: feq.s a0, ft0, ft0 -; RV32IZFH-NEXT: beqz a0, .LBB32_2 -; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0) ; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_1) -; RV32IZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0) -; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft2, fa0 +; RV32IZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV32IZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IZFH-NEXT: .LBB32_2: # %start +; RV32IZFH-NEXT: feq.s a1, ft2, ft2 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_w_s_sat_i16: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: feq.s a0, ft0, ft0 -; RV64IZFH-NEXT: beqz a0, .LBB32_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) +; RV64IZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0) ; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_1) -; RV64IZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0) -; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft2, fa0 +; RV64IZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV64IZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB32_2: # %start +; RV64IZFH-NEXT: feq.s a1, ft2, ft2 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_s_sat_i16: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IDZFH-NEXT: feq.s a0, ft0, ft0 -; RV32IDZFH-NEXT: beqz a0, .LBB32_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0) ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_1) -; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0) -; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0) +; RV32IDZFH-NEXT: fcvt.s.h ft2, fa0 +; RV32IDZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IDZFH-NEXT: .LBB32_2: # %start +; RV32IDZFH-NEXT: feq.s a1, ft2, ft2 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_s_sat_i16: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: feq.s a0, ft0, ft0 -; RV64IDZFH-NEXT: beqz a0, .LBB32_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) +; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI32_0)(a0) ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_1) -; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI32_1)(a0) -; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI32_1)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft2, fa0 +; RV64IDZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB32_2: # %start +; RV64IDZFH-NEXT: feq.s a1, ft2, ft2 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -2329,12 +2301,10 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bnez a0, .LBB32_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: .LBB32_6: # %start -; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2376,12 +2346,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB32_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB32_6: # %start -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -2496,30 +2464,28 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s2, a1, -1 -; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: call __fixunssfsi@plt ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: bltz s1, .LBB34_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: .LBB34_2: # %start ; RV32I-NEXT: lui a0, 292864 ; RV32I-NEXT: addi a1, a0, -256 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: bgtz a0, .LBB34_4 -; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: .LBB34_4: # %start -; RV32I-NEXT: and a0, a1, s2 +; RV32I-NEXT: bgtz a0, .LBB34_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: slti a0, s1, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: .LBB34_2: # %start +; RV32I-NEXT: and a0, a1, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -2537,30 +2503,28 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s2, a1, -1 -; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: call __fixunssfdi@plt ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s3, 0 -; RV64I-NEXT: bltz s1, .LBB34_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: .LBB34_2: # %start ; RV64I-NEXT: lui a0, 292864 ; RV64I-NEXT: addiw a1, a0, -256 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: bgtz a0, .LBB34_4 -; RV64I-NEXT: # %bb.3: # %start ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: .LBB34_4: # %start -; RV64I-NEXT: and a0, a1, s2 +; RV64I-NEXT: bgtz a0, .LBB34_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s1, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: .LBB34_2: # %start +; RV64I-NEXT: and a0, a1, s3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2624,66 +2588,66 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_w_s_sat_i8: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IZFH-NEXT: feq.s a0, ft0, ft0 -; RV32IZFH-NEXT: beqz a0, .LBB36_2 -; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI36_0) -; RV32IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0) ; RV32IZFH-NEXT: lui a0, %hi(.LCPI36_1) -; RV32IZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0) -; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft2, fa0 +; RV32IZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV32IZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IZFH-NEXT: .LBB36_2: # %start +; RV32IZFH-NEXT: feq.s a1, ft2, ft2 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_w_s_sat_i8: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: feq.s a0, ft0, ft0 -; RV64IZFH-NEXT: beqz a0, .LBB36_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: lui a0, %hi(.LCPI36_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) +; RV64IZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0) ; RV64IZFH-NEXT: lui a0, %hi(.LCPI36_1) -; RV64IZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0) -; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft2, fa0 +; RV64IZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV64IZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB36_2: # %start +; RV64IZFH-NEXT: feq.s a1, ft2, ft2 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_s_sat_i8: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IDZFH-NEXT: feq.s a0, ft0, ft0 -; RV32IDZFH-NEXT: beqz a0, .LBB36_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI36_0) -; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0) ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI36_1) -; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0) -; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0) +; RV32IDZFH-NEXT: fcvt.s.h ft2, fa0 +; RV32IDZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IDZFH-NEXT: .LBB36_2: # %start +; RV32IDZFH-NEXT: feq.s a1, ft2, ft2 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_s_sat_i8: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: feq.s a0, ft0, ft0 -; RV64IDZFH-NEXT: beqz a0, .LBB36_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI36_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) +; RV64IDZFH-NEXT: flw ft0, %lo(.LCPI36_0)(a0) ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI36_1) -; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI36_1)(a0) -; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1 -; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2 +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI36_1)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft2, fa0 +; RV64IDZFH-NEXT: fmax.s ft0, ft2, ft0 +; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft1 ; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB36_2: # %start +; RV64IDZFH-NEXT: feq.s a1, ft2, ft2 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: @@ -2718,12 +2682,10 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bnez a0, .LBB36_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: .LBB36_6: # %start -; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2764,12 +2726,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB36_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: .LBB36_6: # %start -; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -2886,25 +2846,23 @@ ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: bltz s1, .LBB38_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: .LBB38_2: # %start +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a1, 276464 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: li a1, 255 -; RV32I-NEXT: bgtz a0, .LBB38_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: .LBB38_4: # %start +; RV32I-NEXT: bgtz a0, .LBB38_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: slti a0, s0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: .LBB38_2: # %start ; RV32I-NEXT: andi a0, a1, 255 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2923,25 +2881,23 @@ ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB38_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB38_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a1, 276464 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: li a1, 255 -; RV64I-NEXT: bgtz a0, .LBB38_4 -; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB38_4: # %start +; RV64I-NEXT: bgtz a0, .LBB38_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a1, a0, s1 +; RV64I-NEXT: .LBB38_2: # %start ; RV64I-NEXT: andi a0, a1, 255 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -2958,42 +2914,42 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_wu_h_sat_zext: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: feq.h a0, fa0, fa0 -; RV32IZFH-NEXT: beqz a0, .LBB39_2 -; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz -; RV32IZFH-NEXT: .LBB39_2: # %start +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_h_sat_zext: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB39_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 ; RV64IZFH-NEXT: slli a0, a0, 32 ; RV64IZFH-NEXT: srli a0, a0, 32 -; RV64IZFH-NEXT: .LBB39_2: # %start ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_h_sat_zext: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV32IDZFH-NEXT: beqz a0, .LBB39_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz -; RV32IDZFH-NEXT: .LBB39_2: # %start +; RV32IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_h_sat_zext: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IDZFH-NEXT: beqz a0, .LBB39_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a0, a1 ; RV64IDZFH-NEXT: slli a0, a0, 32 ; RV64IDZFH-NEXT: srli a0, a0, 32 -; RV64IDZFH-NEXT: .LBB39_2: # %start ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_h_sat_zext: @@ -3041,27 +2997,28 @@ ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: bltz s1, .LBB39_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: .LBB39_2: # %start +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 325632 ; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: blez a0, .LBB39_4 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: bgtz a0, .LBB39_2 +; RV64I-NEXT: # %bb.1: # %start +; RV64I-NEXT: slti a0, s0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: j .LBB39_3 +; RV64I-NEXT: .LBB39_2: ; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: srli s2, a0, 32 -; RV64I-NEXT: .LBB39_4: # %start -; RV64I-NEXT: slli a0, s2, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: .LBB39_3: # %start +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3077,29 +3034,29 @@ define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind { ; CHECKIZFH-LABEL: fcvt_w_h_sat_sext: ; CHECKIZFH: # %bb.0: # %start -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB40_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz -; CHECKIZFH-NEXT: .LBB40_2: # %start +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_h_sat_sext: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV32IDZFH-NEXT: beqz a0, .LBB40_2 -; RV32IDZFH-NEXT: # %bb.1: ; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz -; RV32IDZFH-NEXT: .LBB40_2: # %start +; RV32IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IDZFH-NEXT: seqz a1, a1 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a1, a0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_h_sat_sext: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IDZFH-NEXT: beqz a0, .LBB40_2 -; RV64IDZFH-NEXT: # %bb.1: ; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz -; RV64IDZFH-NEXT: .LBB40_2: # %start +; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IDZFH-NEXT: seqz a1, a1 +; RV64IDZFH-NEXT: addi a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a1, a0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_w_h_sat_sext: @@ -3136,12 +3093,9 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bnez a1, .LBB40_6 -; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: .LBB40_6: # %start +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3184,12 +3138,10 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: bnez a0, .LBB40_6 -; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: .LBB40_6: # %start -; RV64I-NEXT: sext.w a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -7,11 +7,11 @@ define signext i32 @test_floor_si32(half %x) { ; CHECKIZFH-LABEL: test_floor_si32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB0_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rdn -; CHECKIZFH-NEXT: .LBB0_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) @@ -47,18 +47,16 @@ ; RV32IZFH-NEXT: addi a1, a3, -1 ; RV32IZFH-NEXT: .LBB1_4: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: bnez a3, .LBB1_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB1_7 -; RV32IZFH-NEXT: .LBB1_6: -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: seqz a3, a3 +; RV32IZFH-NEXT: addi a3, a3, -1 +; RV32IZFH-NEXT: and a1, a3, a1 +; RV32IZFH-NEXT: seqz a4, s0 +; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 -; RV32IZFH-NEXT: .LBB1_7: +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -67,11 +65,11 @@ ; ; RV64IZFH-LABEL: test_floor_si64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB1_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rdn -; RV64IZFH-NEXT: .LBB1_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) @@ -81,11 +79,11 @@ define signext i32 @test_floor_ui32(half %x) { ; CHECKIZFH-LABEL: test_floor_ui32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB2_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rdn -; CHECKIZFH-NEXT: .LBB2_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) @@ -105,7 +103,8 @@ ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: addi s0, a0, -1 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_0) @@ -114,9 +113,9 @@ ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 +; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a1, a2 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -125,11 +124,11 @@ ; ; RV64IZFH-LABEL: test_floor_ui64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB3_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rdn -; RV64IZFH-NEXT: .LBB3_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) @@ -139,11 +138,11 @@ define signext i32 @test_ceil_si32(half %x) { ; CHECKIZFH-LABEL: test_ceil_si32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB4_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rup -; CHECKIZFH-NEXT: .LBB4_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) @@ -179,18 +178,16 @@ ; RV32IZFH-NEXT: addi a1, a3, -1 ; RV32IZFH-NEXT: .LBB5_4: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: bnez a3, .LBB5_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB5_7 -; RV32IZFH-NEXT: .LBB5_6: -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: seqz a3, a3 +; RV32IZFH-NEXT: addi a3, a3, -1 +; RV32IZFH-NEXT: and a1, a3, a1 +; RV32IZFH-NEXT: seqz a4, s0 +; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 -; RV32IZFH-NEXT: .LBB5_7: +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -199,11 +196,11 @@ ; ; RV64IZFH-LABEL: test_ceil_si64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB5_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rup -; RV64IZFH-NEXT: .LBB5_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) @@ -213,11 +210,11 @@ define signext i32 @test_ceil_ui32(half %x) { ; CHECKIZFH-LABEL: test_ceil_ui32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB6_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rup -; CHECKIZFH-NEXT: .LBB6_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) @@ -237,7 +234,8 @@ ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: addi s0, a0, -1 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_0) @@ -246,9 +244,9 @@ ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 +; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a1, a2 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -257,11 +255,11 @@ ; ; RV64IZFH-LABEL: test_ceil_ui64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB7_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rup -; RV64IZFH-NEXT: .LBB7_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) @@ -271,11 +269,11 @@ define signext i32 @test_trunc_si32(half %x) { ; CHECKIZFH-LABEL: test_trunc_si32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB8_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz -; CHECKIZFH-NEXT: .LBB8_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) @@ -311,18 +309,16 @@ ; RV32IZFH-NEXT: addi a1, a3, -1 ; RV32IZFH-NEXT: .LBB9_4: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: bnez a3, .LBB9_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB9_7 -; RV32IZFH-NEXT: .LBB9_6: -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: seqz a3, a3 +; RV32IZFH-NEXT: addi a3, a3, -1 +; RV32IZFH-NEXT: and a1, a3, a1 +; RV32IZFH-NEXT: seqz a4, s0 +; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 -; RV32IZFH-NEXT: .LBB9_7: +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -331,11 +327,11 @@ ; ; RV64IZFH-LABEL: test_trunc_si64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB9_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz -; RV64IZFH-NEXT: .LBB9_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) @@ -345,11 +341,11 @@ define signext i32 @test_trunc_ui32(half %x) { ; CHECKIZFH-LABEL: test_trunc_ui32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB10_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rtz -; CHECKIZFH-NEXT: .LBB10_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) @@ -369,7 +365,8 @@ ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: addi s0, a0, -1 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_0) @@ -378,9 +375,9 @@ ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 +; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a1, a2 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -389,11 +386,11 @@ ; ; RV64IZFH-LABEL: test_trunc_ui64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB11_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz -; RV64IZFH-NEXT: .LBB11_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) @@ -403,11 +400,11 @@ define signext i32 @test_round_si32(half %x) { ; CHECKIZFH-LABEL: test_round_si32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB12_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rmm -; CHECKIZFH-NEXT: .LBB12_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) @@ -443,18 +440,16 @@ ; RV32IZFH-NEXT: addi a1, a3, -1 ; RV32IZFH-NEXT: .LBB13_4: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: bnez a3, .LBB13_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB13_7 -; RV32IZFH-NEXT: .LBB13_6: -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: seqz a3, a3 +; RV32IZFH-NEXT: addi a3, a3, -1 +; RV32IZFH-NEXT: and a1, a3, a1 +; RV32IZFH-NEXT: seqz a4, s0 +; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 -; RV32IZFH-NEXT: .LBB13_7: +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -463,11 +458,11 @@ ; ; RV64IZFH-LABEL: test_round_si64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB13_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm -; RV64IZFH-NEXT: .LBB13_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) @@ -477,11 +472,11 @@ define signext i32 @test_round_ui32(half %x) { ; CHECKIZFH-LABEL: test_round_ui32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB14_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rmm -; CHECKIZFH-NEXT: .LBB14_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) @@ -501,7 +496,8 @@ ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: addi s0, a0, -1 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_0) @@ -510,9 +506,9 @@ ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 +; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a1, a2 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -521,11 +517,11 @@ ; ; RV64IZFH-LABEL: test_round_ui64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB15_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rmm -; RV64IZFH-NEXT: .LBB15_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) @@ -535,11 +531,11 @@ define signext i32 @test_roundeven_si32(half %x) { ; CHECKIZFH-LABEL: test_roundeven_si32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB16_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rne -; CHECKIZFH-NEXT: .LBB16_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) @@ -575,18 +571,16 @@ ; RV32IZFH-NEXT: addi a1, a3, -1 ; RV32IZFH-NEXT: .LBB17_4: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: bnez a3, .LBB17_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB17_7 -; RV32IZFH-NEXT: .LBB17_6: -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: seqz a3, a3 +; RV32IZFH-NEXT: addi a3, a3, -1 +; RV32IZFH-NEXT: and a1, a3, a1 +; RV32IZFH-NEXT: seqz a4, s0 +; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 -; RV32IZFH-NEXT: .LBB17_7: +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -595,11 +589,11 @@ ; ; RV64IZFH-LABEL: test_roundeven_si64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB17_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rne -; RV64IZFH-NEXT: .LBB17_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) @@ -609,11 +603,11 @@ define signext i32 @test_roundeven_ui32(half %x) { ; CHECKIZFH-LABEL: test_roundeven_ui32: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 -; CHECKIZFH-NEXT: beqz a0, .LBB18_2 -; CHECKIZFH-NEXT: # %bb.1: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rne -; CHECKIZFH-NEXT: .LBB18_2: +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) @@ -633,7 +627,8 @@ ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: addi s0, a0, -1 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_0) @@ -642,9 +637,9 @@ ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 ; RV32IZFH-NEXT: seqz a2, a2 ; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a0, a2 +; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a1, a2 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -653,11 +648,11 @@ ; ; RV64IZFH-LABEL: test_roundeven_ui64: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa0 -; RV64IZFH-NEXT: beqz a0, .LBB19_2 -; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rne -; RV64IZFH-NEXT: .LBB19_2: +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -83,44 +83,44 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: mv a4, a1 -; RV32I-NEXT: bltz a5, .LBB2_2 +; RV32I-NEXT: sll a4, a0, a2 +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: slti a5, a3, 0 +; RV32I-NEXT: neg a5, a5 +; RV32I-NEXT: bltz a3, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, a5 +; RV32I-NEXT: sll a3, a0, a3 ; RV32I-NEXT: j .LBB2_3 ; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: sll a1, a4, a2 -; RV32I-NEXT: xori a3, a2, 31 -; RV32I-NEXT: srli a6, a0, 1 -; RV32I-NEXT: srl a3, a6, a3 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sll a3, a1, a2 +; RV32I-NEXT: xori a6, a2, 31 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: srl a3, a4, a6 -; RV32I-NEXT: bltz a7, .LBB2_6 +; RV32I-NEXT: and a4, a5, a4 +; RV32I-NEXT: neg a7, a2 +; RV32I-NEXT: li a5, 32 +; RV32I-NEXT: sub a6, a5, a2 +; RV32I-NEXT: srl a5, a1, a7 +; RV32I-NEXT: bltz a6, .LBB2_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: bltz a5, .LBB2_7 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: j .LBB2_6 ; RV32I-NEXT: .LBB2_5: -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB2_6: -; RV32I-NEXT: srl a6, a0, a6 +; RV32I-NEXT: srl a0, a0, a7 ; RV32I-NEXT: li a7, 64 -; RV32I-NEXT: sub a7, a7, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: slli a4, a4, 1 -; RV32I-NEXT: sll a4, a4, a7 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: bgez a5, .LBB2_5 -; RV32I-NEXT: .LBB2_7: -; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: sub a2, a7, a2 +; RV32I-NEXT: xori a2, a2, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: .LBB2_6: +; RV32I-NEXT: slti a1, a6, 0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64: @@ -133,44 +133,44 @@ ; ; RV32ZBB-LABEL: rotl_64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 -; RV32ZBB-NEXT: mv a4, a1 -; RV32ZBB-NEXT: bltz a5, .LBB2_2 +; RV32ZBB-NEXT: sll a4, a0, a2 +; RV32ZBB-NEXT: addi a3, a2, -32 +; RV32ZBB-NEXT: slti a5, a3, 0 +; RV32ZBB-NEXT: neg a5, a5 +; RV32ZBB-NEXT: bltz a3, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sll a1, a0, a5 +; RV32ZBB-NEXT: sll a3, a0, a3 ; RV32ZBB-NEXT: j .LBB2_3 ; RV32ZBB-NEXT: .LBB2_2: -; RV32ZBB-NEXT: sll a1, a4, a2 -; RV32ZBB-NEXT: xori a3, a2, 31 -; RV32ZBB-NEXT: srli a6, a0, 1 -; RV32ZBB-NEXT: srl a3, a6, a3 -; RV32ZBB-NEXT: or a1, a1, a3 +; RV32ZBB-NEXT: sll a3, a1, a2 +; RV32ZBB-NEXT: xori a6, a2, 31 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB2_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: li a3, 32 -; RV32ZBB-NEXT: sub a7, a3, a2 -; RV32ZBB-NEXT: srl a3, a4, a6 -; RV32ZBB-NEXT: bltz a7, .LBB2_6 +; RV32ZBB-NEXT: and a4, a5, a4 +; RV32ZBB-NEXT: neg a7, a2 +; RV32ZBB-NEXT: li a5, 32 +; RV32ZBB-NEXT: sub a6, a5, a2 +; RV32ZBB-NEXT: srl a5, a1, a7 +; RV32ZBB-NEXT: bltz a6, .LBB2_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: bltz a5, .LBB2_7 +; RV32ZBB-NEXT: mv a0, a5 +; RV32ZBB-NEXT: j .LBB2_6 ; RV32ZBB-NEXT: .LBB2_5: -; RV32ZBB-NEXT: mv a0, a3 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB2_6: -; RV32ZBB-NEXT: srl a6, a0, a6 +; RV32ZBB-NEXT: srl a0, a0, a7 ; RV32ZBB-NEXT: li a7, 64 -; RV32ZBB-NEXT: sub a7, a7, a2 -; RV32ZBB-NEXT: xori a7, a7, 31 -; RV32ZBB-NEXT: slli a4, a4, 1 -; RV32ZBB-NEXT: sll a4, a4, a7 -; RV32ZBB-NEXT: or a4, a6, a4 -; RV32ZBB-NEXT: or a1, a1, a3 -; RV32ZBB-NEXT: mv a3, a4 -; RV32ZBB-NEXT: bgez a5, .LBB2_5 -; RV32ZBB-NEXT: .LBB2_7: -; RV32ZBB-NEXT: sll a0, a0, a2 -; RV32ZBB-NEXT: or a3, a3, a0 -; RV32ZBB-NEXT: mv a0, a3 +; RV32ZBB-NEXT: sub a2, a7, a2 +; RV32ZBB-NEXT: xori a2, a2, 31 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: sll a1, a1, a2 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: .LBB2_6: +; RV32ZBB-NEXT: slti a1, a6, 0 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a4, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64: @@ -187,44 +187,44 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: bltz a5, .LBB3_2 +; RV32I-NEXT: srl a4, a1, a2 +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: slti a5, a3, 0 +; RV32I-NEXT: neg a5, a5 +; RV32I-NEXT: bltz a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a3, a1, a3 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srl a0, a4, a2 -; RV32I-NEXT: xori a3, a2, 31 -; RV32I-NEXT: slli a6, a1, 1 -; RV32I-NEXT: sll a3, a6, a3 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: srl a3, a0, a2 +; RV32I-NEXT: xori a6, a2, 31 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: sll a3, a4, a6 -; RV32I-NEXT: bltz a7, .LBB3_6 +; RV32I-NEXT: and a4, a5, a4 +; RV32I-NEXT: neg a7, a2 +; RV32I-NEXT: li a5, 32 +; RV32I-NEXT: sub a6, a5, a2 +; RV32I-NEXT: sll a5, a0, a7 +; RV32I-NEXT: bltz a6, .LBB3_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: bltz a5, .LBB3_7 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: j .LBB3_6 ; RV32I-NEXT: .LBB3_5: -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB3_6: -; RV32I-NEXT: sll a6, a1, a6 +; RV32I-NEXT: sll a1, a1, a7 ; RV32I-NEXT: li a7, 64 -; RV32I-NEXT: sub a7, a7, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srli a4, a4, 1 -; RV32I-NEXT: srl a4, a4, a7 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: bgez a5, .LBB3_5 -; RV32I-NEXT: .LBB3_7: -; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: sub a2, a7, a2 +; RV32I-NEXT: xori a2, a2, 31 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: .LBB3_6: +; RV32I-NEXT: slti a0, a6, 0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_64: @@ -237,44 +237,44 @@ ; ; RV32ZBB-LABEL: rotr_64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bltz a5, .LBB3_2 +; RV32ZBB-NEXT: srl a4, a1, a2 +; RV32ZBB-NEXT: addi a3, a2, -32 +; RV32ZBB-NEXT: slti a5, a3, 0 +; RV32ZBB-NEXT: neg a5, a5 +; RV32ZBB-NEXT: bltz a3, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a3, a1, a3 ; RV32ZBB-NEXT: j .LBB3_3 ; RV32ZBB-NEXT: .LBB3_2: -; RV32ZBB-NEXT: srl a0, a4, a2 -; RV32ZBB-NEXT: xori a3, a2, 31 -; RV32ZBB-NEXT: slli a6, a1, 1 -; RV32ZBB-NEXT: sll a3, a6, a3 -; RV32ZBB-NEXT: or a0, a0, a3 +; RV32ZBB-NEXT: srl a3, a0, a2 +; RV32ZBB-NEXT: xori a6, a2, 31 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB3_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: li a3, 32 -; RV32ZBB-NEXT: sub a7, a3, a2 -; RV32ZBB-NEXT: sll a3, a4, a6 -; RV32ZBB-NEXT: bltz a7, .LBB3_6 +; RV32ZBB-NEXT: and a4, a5, a4 +; RV32ZBB-NEXT: neg a7, a2 +; RV32ZBB-NEXT: li a5, 32 +; RV32ZBB-NEXT: sub a6, a5, a2 +; RV32ZBB-NEXT: sll a5, a0, a7 +; RV32ZBB-NEXT: bltz a6, .LBB3_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: bltz a5, .LBB3_7 +; RV32ZBB-NEXT: mv a1, a5 +; RV32ZBB-NEXT: j .LBB3_6 ; RV32ZBB-NEXT: .LBB3_5: -; RV32ZBB-NEXT: mv a1, a3 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB3_6: -; RV32ZBB-NEXT: sll a6, a1, a6 +; RV32ZBB-NEXT: sll a1, a1, a7 ; RV32ZBB-NEXT: li a7, 64 -; RV32ZBB-NEXT: sub a7, a7, a2 -; RV32ZBB-NEXT: xori a7, a7, 31 -; RV32ZBB-NEXT: srli a4, a4, 1 -; RV32ZBB-NEXT: srl a4, a4, a7 -; RV32ZBB-NEXT: or a4, a6, a4 -; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: mv a3, a4 -; RV32ZBB-NEXT: bgez a5, .LBB3_5 -; RV32ZBB-NEXT: .LBB3_7: -; RV32ZBB-NEXT: srl a1, a1, a2 -; RV32ZBB-NEXT: or a3, a3, a1 -; RV32ZBB-NEXT: mv a1, a3 +; RV32ZBB-NEXT: sub a2, a7, a2 +; RV32ZBB-NEXT: xori a2, a2, 31 +; RV32ZBB-NEXT: srli a0, a0, 1 +; RV32ZBB-NEXT: srl a0, a0, a2 +; RV32ZBB-NEXT: or a1, a1, a0 +; RV32ZBB-NEXT: .LBB3_6: +; RV32ZBB-NEXT: slti a0, a6, 0 +; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a1, a4, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_64: @@ -503,41 +503,42 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64_mask: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bltz a5, .LBB10_2 +; RV32I-NEXT: addi a4, a2, -32 +; RV32I-NEXT: bltz a4, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, a5 +; RV32I-NEXT: sll a3, a0, a4 ; RV32I-NEXT: j .LBB10_3 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: sll a1, a3, a2 -; RV32I-NEXT: xori a4, a2, 31 +; RV32I-NEXT: sll a3, a1, a2 +; RV32I-NEXT: xori a5, a2, 31 ; RV32I-NEXT: srli a6, a0, 1 -; RV32I-NEXT: srl a4, a6, a4 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: srl a5, a6, a5 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi a4, a6, 63 -; RV32I-NEXT: addi a7, a4, -32 +; RV32I-NEXT: sll a5, a0, a2 +; RV32I-NEXT: slti a4, a4, 0 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: srl a2, a1, a5 +; RV32I-NEXT: andi a6, a5, 63 +; RV32I-NEXT: addi a7, a6, -32 +; RV32I-NEXT: slti t0, a7, 0 +; RV32I-NEXT: neg t0, t0 +; RV32I-NEXT: and a2, t0, a2 ; RV32I-NEXT: bltz a7, .LBB10_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a3, a7 -; RV32I-NEXT: bltz a5, .LBB10_6 -; RV32I-NEXT: j .LBB10_7 +; RV32I-NEXT: srl a0, a1, a7 +; RV32I-NEXT: j .LBB10_6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: srl a7, a0, a6 -; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: slli t0, a3, 1 -; RV32I-NEXT: sll a4, t0, a4 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: srl a3, a3, a6 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: bgez a5, .LBB10_7 +; RV32I-NEXT: srl a0, a0, a5 +; RV32I-NEXT: xori a5, a6, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a5 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a4, a4, a0 -; RV32I-NEXT: .LBB10_7: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: or a1, a3, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask: @@ -550,41 +551,42 @@ ; ; RV32ZBB-LABEL: rotl_64_mask: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 -; RV32ZBB-NEXT: mv a3, a1 -; RV32ZBB-NEXT: bltz a5, .LBB10_2 +; RV32ZBB-NEXT: addi a4, a2, -32 +; RV32ZBB-NEXT: bltz a4, .LBB10_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sll a1, a0, a5 +; RV32ZBB-NEXT: sll a3, a0, a4 ; RV32ZBB-NEXT: j .LBB10_3 ; RV32ZBB-NEXT: .LBB10_2: -; RV32ZBB-NEXT: sll a1, a3, a2 -; RV32ZBB-NEXT: xori a4, a2, 31 +; RV32ZBB-NEXT: sll a3, a1, a2 +; RV32ZBB-NEXT: xori a5, a2, 31 ; RV32ZBB-NEXT: srli a6, a0, 1 -; RV32ZBB-NEXT: srl a4, a6, a4 -; RV32ZBB-NEXT: or a1, a1, a4 +; RV32ZBB-NEXT: srl a5, a6, a5 +; RV32ZBB-NEXT: or a3, a3, a5 ; RV32ZBB-NEXT: .LBB10_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi a4, a6, 63 -; RV32ZBB-NEXT: addi a7, a4, -32 +; RV32ZBB-NEXT: sll a5, a0, a2 +; RV32ZBB-NEXT: slti a4, a4, 0 +; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: and a4, a4, a5 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: srl a2, a1, a5 +; RV32ZBB-NEXT: andi a6, a5, 63 +; RV32ZBB-NEXT: addi a7, a6, -32 +; RV32ZBB-NEXT: slti t0, a7, 0 +; RV32ZBB-NEXT: neg t0, t0 +; RV32ZBB-NEXT: and a2, t0, a2 ; RV32ZBB-NEXT: bltz a7, .LBB10_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a4, a3, a7 -; RV32ZBB-NEXT: bltz a5, .LBB10_6 -; RV32ZBB-NEXT: j .LBB10_7 +; RV32ZBB-NEXT: srl a0, a1, a7 +; RV32ZBB-NEXT: j .LBB10_6 ; RV32ZBB-NEXT: .LBB10_5: -; RV32ZBB-NEXT: srl a7, a0, a6 -; RV32ZBB-NEXT: xori a4, a4, 31 -; RV32ZBB-NEXT: slli t0, a3, 1 -; RV32ZBB-NEXT: sll a4, t0, a4 -; RV32ZBB-NEXT: or a4, a7, a4 -; RV32ZBB-NEXT: srl a3, a3, a6 -; RV32ZBB-NEXT: or a1, a1, a3 -; RV32ZBB-NEXT: bgez a5, .LBB10_7 +; RV32ZBB-NEXT: srl a0, a0, a5 +; RV32ZBB-NEXT: xori a5, a6, 31 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: sll a1, a1, a5 +; RV32ZBB-NEXT: or a0, a0, a1 ; RV32ZBB-NEXT: .LBB10_6: -; RV32ZBB-NEXT: sll a0, a0, a2 -; RV32ZBB-NEXT: or a4, a4, a0 -; RV32ZBB-NEXT: .LBB10_7: -; RV32ZBB-NEXT: mv a0, a4 +; RV32ZBB-NEXT: or a0, a4, a0 +; RV32ZBB-NEXT: or a1, a3, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask: @@ -602,42 +604,43 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64_mask_and_127_and_63: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a4, a2, 127 -; RV32I-NEXT: addi a5, a4, -32 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bltz a5, .LBB11_2 +; RV32I-NEXT: andi a3, a2, 127 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: bltz a4, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, a5 +; RV32I-NEXT: sll a3, a0, a4 ; RV32I-NEXT: j .LBB11_3 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sll a1, a3, a2 +; RV32I-NEXT: sll a5, a1, a2 ; RV32I-NEXT: srli a6, a0, 1 -; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: srl a4, a6, a4 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: xori a3, a3, 31 +; RV32I-NEXT: srl a3, a6, a3 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi a4, a6, 63 -; RV32I-NEXT: addi a7, a4, -32 +; RV32I-NEXT: sll a5, a0, a2 +; RV32I-NEXT: slti a4, a4, 0 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: srl a2, a1, a5 +; RV32I-NEXT: andi a6, a5, 63 +; RV32I-NEXT: addi a7, a6, -32 +; RV32I-NEXT: slti t0, a7, 0 +; RV32I-NEXT: neg t0, t0 +; RV32I-NEXT: and a2, t0, a2 ; RV32I-NEXT: bltz a7, .LBB11_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a3, a7 -; RV32I-NEXT: bltz a5, .LBB11_6 -; RV32I-NEXT: j .LBB11_7 +; RV32I-NEXT: srl a0, a1, a7 +; RV32I-NEXT: j .LBB11_6 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: srl a7, a0, a6 -; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: slli t0, a3, 1 -; RV32I-NEXT: sll a4, t0, a4 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: srl a3, a3, a6 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: bgez a5, .LBB11_7 +; RV32I-NEXT: srl a0, a0, a5 +; RV32I-NEXT: xori a5, a6, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a5 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a4, a4, a0 -; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: or a1, a3, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_and_127_and_63: @@ -650,42 +653,43 @@ ; ; RV32ZBB-LABEL: rotl_64_mask_and_127_and_63: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a4, a2, 127 -; RV32ZBB-NEXT: addi a5, a4, -32 -; RV32ZBB-NEXT: mv a3, a1 -; RV32ZBB-NEXT: bltz a5, .LBB11_2 +; RV32ZBB-NEXT: andi a3, a2, 127 +; RV32ZBB-NEXT: addi a4, a3, -32 +; RV32ZBB-NEXT: bltz a4, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sll a1, a0, a5 +; RV32ZBB-NEXT: sll a3, a0, a4 ; RV32ZBB-NEXT: j .LBB11_3 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sll a1, a3, a2 +; RV32ZBB-NEXT: sll a5, a1, a2 ; RV32ZBB-NEXT: srli a6, a0, 1 -; RV32ZBB-NEXT: xori a4, a4, 31 -; RV32ZBB-NEXT: srl a4, a6, a4 -; RV32ZBB-NEXT: or a1, a1, a4 +; RV32ZBB-NEXT: xori a3, a3, 31 +; RV32ZBB-NEXT: srl a3, a6, a3 +; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB11_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi a4, a6, 63 -; RV32ZBB-NEXT: addi a7, a4, -32 +; RV32ZBB-NEXT: sll a5, a0, a2 +; RV32ZBB-NEXT: slti a4, a4, 0 +; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: and a4, a4, a5 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: srl a2, a1, a5 +; RV32ZBB-NEXT: andi a6, a5, 63 +; RV32ZBB-NEXT: addi a7, a6, -32 +; RV32ZBB-NEXT: slti t0, a7, 0 +; RV32ZBB-NEXT: neg t0, t0 +; RV32ZBB-NEXT: and a2, t0, a2 ; RV32ZBB-NEXT: bltz a7, .LBB11_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a4, a3, a7 -; RV32ZBB-NEXT: bltz a5, .LBB11_6 -; RV32ZBB-NEXT: j .LBB11_7 +; RV32ZBB-NEXT: srl a0, a1, a7 +; RV32ZBB-NEXT: j .LBB11_6 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: srl a7, a0, a6 -; RV32ZBB-NEXT: xori a4, a4, 31 -; RV32ZBB-NEXT: slli t0, a3, 1 -; RV32ZBB-NEXT: sll a4, t0, a4 -; RV32ZBB-NEXT: or a4, a7, a4 -; RV32ZBB-NEXT: srl a3, a3, a6 -; RV32ZBB-NEXT: or a1, a1, a3 -; RV32ZBB-NEXT: bgez a5, .LBB11_7 +; RV32ZBB-NEXT: srl a0, a0, a5 +; RV32ZBB-NEXT: xori a5, a6, 31 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: sll a1, a1, a5 +; RV32ZBB-NEXT: or a0, a0, a1 ; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: sll a0, a0, a2 -; RV32ZBB-NEXT: or a4, a4, a0 -; RV32ZBB-NEXT: .LBB11_7: -; RV32ZBB-NEXT: mv a0, a4 +; RV32ZBB-NEXT: or a0, a4, a0 +; RV32ZBB-NEXT: or a1, a3, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_and_127_and_63: @@ -744,41 +748,42 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: bltz a5, .LBB13_2 +; RV32I-NEXT: srl a4, a1, a2 +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: slti a5, a3, 0 +; RV32I-NEXT: neg a5, a5 +; RV32I-NEXT: bltz a3, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a3, a1, a3 ; RV32I-NEXT: j .LBB13_3 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: srl a0, a3, a2 -; RV32I-NEXT: xori a4, a2, 31 -; RV32I-NEXT: slli a6, a1, 1 -; RV32I-NEXT: sll a4, a6, a4 -; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srl a3, a0, a2 +; RV32I-NEXT: xori a6, a2, 31 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB13_3: ; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi a4, a6, 63 -; RV32I-NEXT: addi a7, a4, -32 +; RV32I-NEXT: andi t0, a6, 63 +; RV32I-NEXT: addi a7, t0, -32 +; RV32I-NEXT: and a2, a5, a4 ; RV32I-NEXT: bltz a7, .LBB13_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a4, a3, a7 -; RV32I-NEXT: bltz a5, .LBB13_6 -; RV32I-NEXT: j .LBB13_7 +; RV32I-NEXT: sll a1, a0, a7 +; RV32I-NEXT: j .LBB13_6 ; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: sll a7, a1, a6 -; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: srli t0, a3, 1 -; RV32I-NEXT: srl a4, t0, a4 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: sll a3, a3, a6 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: bgez a5, .LBB13_7 +; RV32I-NEXT: sll a1, a1, a6 +; RV32I-NEXT: xori a4, t0, 31 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB13_6: -; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a4, a4, a1 -; RV32I-NEXT: .LBB13_7: -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: sll a0, a0, a6 +; RV32I-NEXT: slti a4, a7, 0 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_64_mask: @@ -791,41 +796,42 @@ ; ; RV32ZBB-LABEL: rotr_64_mask: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: bltz a5, .LBB13_2 +; RV32ZBB-NEXT: srl a4, a1, a2 +; RV32ZBB-NEXT: addi a3, a2, -32 +; RV32ZBB-NEXT: slti a5, a3, 0 +; RV32ZBB-NEXT: neg a5, a5 +; RV32ZBB-NEXT: bltz a3, .LBB13_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a3, a1, a3 ; RV32ZBB-NEXT: j .LBB13_3 ; RV32ZBB-NEXT: .LBB13_2: -; RV32ZBB-NEXT: srl a0, a3, a2 -; RV32ZBB-NEXT: xori a4, a2, 31 -; RV32ZBB-NEXT: slli a6, a1, 1 -; RV32ZBB-NEXT: sll a4, a6, a4 -; RV32ZBB-NEXT: or a0, a0, a4 +; RV32ZBB-NEXT: srl a3, a0, a2 +; RV32ZBB-NEXT: xori a6, a2, 31 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB13_3: ; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi a4, a6, 63 -; RV32ZBB-NEXT: addi a7, a4, -32 +; RV32ZBB-NEXT: andi t0, a6, 63 +; RV32ZBB-NEXT: addi a7, t0, -32 +; RV32ZBB-NEXT: and a2, a5, a4 ; RV32ZBB-NEXT: bltz a7, .LBB13_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a4, a3, a7 -; RV32ZBB-NEXT: bltz a5, .LBB13_6 -; RV32ZBB-NEXT: j .LBB13_7 +; RV32ZBB-NEXT: sll a1, a0, a7 +; RV32ZBB-NEXT: j .LBB13_6 ; RV32ZBB-NEXT: .LBB13_5: -; RV32ZBB-NEXT: sll a7, a1, a6 -; RV32ZBB-NEXT: xori a4, a4, 31 -; RV32ZBB-NEXT: srli t0, a3, 1 -; RV32ZBB-NEXT: srl a4, t0, a4 -; RV32ZBB-NEXT: or a4, a7, a4 -; RV32ZBB-NEXT: sll a3, a3, a6 -; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: bgez a5, .LBB13_7 +; RV32ZBB-NEXT: sll a1, a1, a6 +; RV32ZBB-NEXT: xori a4, t0, 31 +; RV32ZBB-NEXT: srli a5, a0, 1 +; RV32ZBB-NEXT: srl a4, a5, a4 +; RV32ZBB-NEXT: or a1, a1, a4 ; RV32ZBB-NEXT: .LBB13_6: -; RV32ZBB-NEXT: srl a1, a1, a2 -; RV32ZBB-NEXT: or a4, a4, a1 -; RV32ZBB-NEXT: .LBB13_7: -; RV32ZBB-NEXT: mv a1, a4 +; RV32ZBB-NEXT: sll a0, a0, a6 +; RV32ZBB-NEXT: slti a4, a7, 0 +; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: and a0, a4, a0 +; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_64_mask: @@ -843,42 +849,43 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask_and_127_and_63: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a4, a2, 127 -; RV32I-NEXT: addi a5, a4, -32 -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: bltz a5, .LBB14_2 +; RV32I-NEXT: srl a4, a1, a2 +; RV32I-NEXT: andi a3, a2, 127 +; RV32I-NEXT: addi a6, a3, -32 +; RV32I-NEXT: slti a5, a6, 0 +; RV32I-NEXT: neg a5, a5 +; RV32I-NEXT: bltz a6, .LBB14_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a3, a1, a6 ; RV32I-NEXT: j .LBB14_3 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: srl a0, a3, a2 -; RV32I-NEXT: slli a6, a1, 1 -; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: sll a4, a6, a4 -; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srl a6, a0, a2 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: xori a3, a3, 31 +; RV32I-NEXT: sll a3, a7, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: .LBB14_3: ; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi a4, a6, 63 -; RV32I-NEXT: addi a7, a4, -32 +; RV32I-NEXT: andi t0, a6, 63 +; RV32I-NEXT: addi a7, t0, -32 +; RV32I-NEXT: and a2, a5, a4 ; RV32I-NEXT: bltz a7, .LBB14_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a4, a3, a7 -; RV32I-NEXT: bltz a5, .LBB14_6 -; RV32I-NEXT: j .LBB14_7 +; RV32I-NEXT: sll a1, a0, a7 +; RV32I-NEXT: j .LBB14_6 ; RV32I-NEXT: .LBB14_5: -; RV32I-NEXT: sll a7, a1, a6 -; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: srli t0, a3, 1 -; RV32I-NEXT: srl a4, t0, a4 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: sll a3, a3, a6 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: bgez a5, .LBB14_7 +; RV32I-NEXT: sll a1, a1, a6 +; RV32I-NEXT: xori a4, t0, 31 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB14_6: -; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a4, a4, a1 -; RV32I-NEXT: .LBB14_7: -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: sll a0, a0, a6 +; RV32I-NEXT: slti a4, a7, 0 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_64_mask_and_127_and_63: @@ -891,42 +898,43 @@ ; ; RV32ZBB-LABEL: rotr_64_mask_and_127_and_63: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a4, a2, 127 -; RV32ZBB-NEXT: addi a5, a4, -32 -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: bltz a5, .LBB14_2 +; RV32ZBB-NEXT: srl a4, a1, a2 +; RV32ZBB-NEXT: andi a3, a2, 127 +; RV32ZBB-NEXT: addi a6, a3, -32 +; RV32ZBB-NEXT: slti a5, a6, 0 +; RV32ZBB-NEXT: neg a5, a5 +; RV32ZBB-NEXT: bltz a6, .LBB14_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a3, a1, a6 ; RV32ZBB-NEXT: j .LBB14_3 ; RV32ZBB-NEXT: .LBB14_2: -; RV32ZBB-NEXT: srl a0, a3, a2 -; RV32ZBB-NEXT: slli a6, a1, 1 -; RV32ZBB-NEXT: xori a4, a4, 31 -; RV32ZBB-NEXT: sll a4, a6, a4 -; RV32ZBB-NEXT: or a0, a0, a4 +; RV32ZBB-NEXT: srl a6, a0, a2 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: xori a3, a3, 31 +; RV32ZBB-NEXT: sll a3, a7, a3 +; RV32ZBB-NEXT: or a3, a6, a3 ; RV32ZBB-NEXT: .LBB14_3: ; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi a4, a6, 63 -; RV32ZBB-NEXT: addi a7, a4, -32 +; RV32ZBB-NEXT: andi t0, a6, 63 +; RV32ZBB-NEXT: addi a7, t0, -32 +; RV32ZBB-NEXT: and a2, a5, a4 ; RV32ZBB-NEXT: bltz a7, .LBB14_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a4, a3, a7 -; RV32ZBB-NEXT: bltz a5, .LBB14_6 -; RV32ZBB-NEXT: j .LBB14_7 +; RV32ZBB-NEXT: sll a1, a0, a7 +; RV32ZBB-NEXT: j .LBB14_6 ; RV32ZBB-NEXT: .LBB14_5: -; RV32ZBB-NEXT: sll a7, a1, a6 -; RV32ZBB-NEXT: xori a4, a4, 31 -; RV32ZBB-NEXT: srli t0, a3, 1 -; RV32ZBB-NEXT: srl a4, t0, a4 -; RV32ZBB-NEXT: or a4, a7, a4 -; RV32ZBB-NEXT: sll a3, a3, a6 -; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: bgez a5, .LBB14_7 +; RV32ZBB-NEXT: sll a1, a1, a6 +; RV32ZBB-NEXT: xori a4, t0, 31 +; RV32ZBB-NEXT: srli a5, a0, 1 +; RV32ZBB-NEXT: srl a4, a5, a4 +; RV32ZBB-NEXT: or a1, a1, a4 ; RV32ZBB-NEXT: .LBB14_6: -; RV32ZBB-NEXT: srl a1, a1, a2 -; RV32ZBB-NEXT: or a4, a4, a1 -; RV32ZBB-NEXT: .LBB14_7: -; RV32ZBB-NEXT: mv a1, a4 +; RV32ZBB-NEXT: sll a0, a0, a6 +; RV32ZBB-NEXT: slti a4, a7, 0 +; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: and a0, a4, a0 +; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_64_mask_and_127_and_63: @@ -1054,17 +1062,19 @@ ; RV32I-NEXT: bltz a0, .LBB17_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: sll a3, a2, a0 -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB17_7 ; RV32I-NEXT: .LBB17_6: -; RV32I-NEXT: sll a0, a3, a4 -; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: sll a3, a3, a4 +; RV32I-NEXT: srli a7, a2, 1 ; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: srl a3, a3, a6 -; RV32I-NEXT: or a3, a0, a3 -; RV32I-NEXT: sll a0, a2, a4 -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB17_7: +; RV32I-NEXT: sll a2, a2, a4 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: sltu a1, a0, a1 ; RV32I-NEXT: add a2, a5, a3 ; RV32I-NEXT: add a1, a2, a1 @@ -1107,17 +1117,19 @@ ; RV32ZBB-NEXT: bltz a0, .LBB17_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: sll a3, a2, a0 -; RV32ZBB-NEXT: mv a0, a1 ; RV32ZBB-NEXT: j .LBB17_7 ; RV32ZBB-NEXT: .LBB17_6: -; RV32ZBB-NEXT: sll a0, a3, a4 -; RV32ZBB-NEXT: srli a3, a2, 1 +; RV32ZBB-NEXT: sll a3, a3, a4 +; RV32ZBB-NEXT: srli a7, a2, 1 ; RV32ZBB-NEXT: xori a6, a6, 31 -; RV32ZBB-NEXT: srl a3, a3, a6 -; RV32ZBB-NEXT: or a3, a0, a3 -; RV32ZBB-NEXT: sll a0, a2, a4 -; RV32ZBB-NEXT: add a0, a1, a0 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB17_7: +; RV32ZBB-NEXT: sll a2, a2, a4 +; RV32ZBB-NEXT: slti a0, a0, 0 +; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: add a0, a1, a0 ; RV32ZBB-NEXT: sltu a1, a0, a1 ; RV32ZBB-NEXT: add a2, a5, a3 ; RV32ZBB-NEXT: add a1, a2, a1 @@ -1206,17 +1218,19 @@ ; RV32I-NEXT: bltz a0, .LBB19_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: sll a3, a2, a0 -; RV32I-NEXT: mv a0, a6 ; RV32I-NEXT: j .LBB19_7 ; RV32I-NEXT: .LBB19_6: -; RV32I-NEXT: sll a0, a3, a4 -; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: sll a3, a3, a4 +; RV32I-NEXT: srli a7, a2, 1 ; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: srl a3, a3, a5 -; RV32I-NEXT: or a3, a0, a3 -; RV32I-NEXT: sll a0, a2, a4 -; RV32I-NEXT: add a0, a6, a0 +; RV32I-NEXT: srl a5, a7, a5 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB19_7: +; RV32I-NEXT: sll a2, a2, a4 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a6, a0 ; RV32I-NEXT: sltu a2, a0, a6 ; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a1, a1, a2 @@ -1258,17 +1272,19 @@ ; RV32ZBB-NEXT: bltz a0, .LBB19_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: sll a3, a2, a0 -; RV32ZBB-NEXT: mv a0, a6 ; RV32ZBB-NEXT: j .LBB19_7 ; RV32ZBB-NEXT: .LBB19_6: -; RV32ZBB-NEXT: sll a0, a3, a4 -; RV32ZBB-NEXT: srli a3, a2, 1 +; RV32ZBB-NEXT: sll a3, a3, a4 +; RV32ZBB-NEXT: srli a7, a2, 1 ; RV32ZBB-NEXT: xori a5, a5, 31 -; RV32ZBB-NEXT: srl a3, a3, a5 -; RV32ZBB-NEXT: or a3, a0, a3 -; RV32ZBB-NEXT: sll a0, a2, a4 -; RV32ZBB-NEXT: add a0, a6, a0 +; RV32ZBB-NEXT: srl a5, a7, a5 +; RV32ZBB-NEXT: or a3, a3, a5 ; RV32ZBB-NEXT: .LBB19_7: +; RV32ZBB-NEXT: sll a2, a2, a4 +; RV32ZBB-NEXT: slti a0, a0, 0 +; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: add a0, a6, a0 ; RV32ZBB-NEXT: sltu a2, a0, a6 ; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: add a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll @@ -296,33 +296,20 @@ } define i64 @not_shl_one_i64(i64 %x) { -; RV32I-LABEL: not_shl_one_i64: -; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: sll a2, a1, a0 -; RV32I-NEXT: addi a0, a0, -32 -; RV32I-NEXT: sll a1, a1, a0 -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: not a1, a1 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: ret -; -; RV32ZBB-ZBKB-LABEL: not_shl_one_i64: -; RV32ZBB-ZBKB: # %bb.0: -; RV32ZBB-ZBKB-NEXT: addi a1, a0, -32 -; RV32ZBB-ZBKB-NEXT: li a2, -2 -; RV32ZBB-ZBKB-NEXT: rol a3, a2, a1 -; RV32ZBB-ZBKB-NEXT: slti a4, a1, 0 -; RV32ZBB-ZBKB-NEXT: neg a1, a4 -; RV32ZBB-ZBKB-NEXT: or a1, a1, a3 -; RV32ZBB-ZBKB-NEXT: rol a0, a2, a0 -; RV32ZBB-ZBKB-NEXT: addi a2, a4, -1 -; RV32ZBB-ZBKB-NEXT: or a0, a2, a0 -; RV32ZBB-ZBKB-NEXT: ret +; CHECK-LABEL: not_shl_one_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: sll a2, a1, a0 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: slti a3, a0, 0 +; CHECK-NEXT: neg a4, a3 +; CHECK-NEXT: and a2, a4, a2 +; CHECK-NEXT: sll a0, a1, a0 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: and a1, a1, a0 +; CHECK-NEXT: not a0, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: ret %1 = shl i64 1, %x %2 = xor i64 %1, -1 ret i64 %2 diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll --- a/llvm/test/CodeGen/RISCV/rv32zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll @@ -46,31 +46,37 @@ define i64 @bclr_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: bclr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a2, 63 -; RV32I-NEXT: addi a4, a3, -32 ; RV32I-NEXT: li a3, 1 -; RV32I-NEXT: bltz a4, .LBB2_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a2, a3, a4 -; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: sll a4, a3, a2 +; RV32I-NEXT: andi a2, a2, 63 +; RV32I-NEXT: addi a2, a2, -32 +; RV32I-NEXT: slti a5, a2, 0 +; RV32I-NEXT: neg a6, a5 +; RV32I-NEXT: and a4, a6, a4 ; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: addi a3, a5, -1 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: not a3, a4 ; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: and a0, a3, a0 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBS-LABEL: bclr_i64: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: andi a3, a2, 63 ; RV32ZBS-NEXT: addi a3, a3, -32 -; RV32ZBS-NEXT: bltz a3, .LBB2_2 -; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: bclr a1, a1, a3 -; RV32ZBS-NEXT: ret -; RV32ZBS-NEXT: .LBB2_2: -; RV32ZBS-NEXT: bclr a0, a0, a2 +; RV32ZBS-NEXT: slti a4, a3, 0 +; RV32ZBS-NEXT: neg a5, a4 +; RV32ZBS-NEXT: bset a2, zero, a2 +; RV32ZBS-NEXT: and a2, a5, a2 +; RV32ZBS-NEXT: bset a3, zero, a3 +; RV32ZBS-NEXT: addi a4, a4, -1 +; RV32ZBS-NEXT: and a3, a4, a3 +; RV32ZBS-NEXT: not a3, a3 +; RV32ZBS-NEXT: not a2, a2 +; RV32ZBS-NEXT: and a0, a2, a0 +; RV32ZBS-NEXT: and a1, a3, a1 ; RV32ZBS-NEXT: ret %and = and i64 %b, 63 %shl = shl nuw i64 1, %and @@ -163,29 +169,27 @@ define signext i64 @bset_i64_zero(i64 signext %a) nounwind { ; RV32I-LABEL: bset_i64_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a1, a0, -32 -; RV32I-NEXT: li a2, 1 -; RV32I-NEXT: bltz a1, .LBB7_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: sll a1, a2, a1 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sll a0, a2, a0 +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: sll a2, a1, a0 +; RV32I-NEXT: addi a3, a0, -32 +; RV32I-NEXT: slti a4, a3, 0 +; RV32I-NEXT: neg a0, a4 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: sll a1, a1, a3 +; RV32I-NEXT: addi a2, a4, -1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBS-LABEL: bset_i64_zero: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: addi a1, a0, -32 -; RV32ZBS-NEXT: bltz a1, .LBB7_2 -; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: li a0, 0 -; RV32ZBS-NEXT: bset a1, zero, a1 -; RV32ZBS-NEXT: ret -; RV32ZBS-NEXT: .LBB7_2: -; RV32ZBS-NEXT: li a1, 0 +; RV32ZBS-NEXT: slti a2, a1, 0 +; RV32ZBS-NEXT: neg a3, a2 ; RV32ZBS-NEXT: bset a0, zero, a0 +; RV32ZBS-NEXT: and a0, a3, a0 +; RV32ZBS-NEXT: bset a1, zero, a1 +; RV32ZBS-NEXT: addi a2, a2, -1 +; RV32ZBS-NEXT: and a1, a2, a1 ; RV32ZBS-NEXT: ret %shl = shl i64 1, %a ret i64 %shl diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -479,19 +479,17 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-LABEL: ffs_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: li s0, 0 +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s1, a0 +; RV64I-NEXT: and a0, s0, a0 ; RV64I-NEXT: lui a1, 30667 ; RV64I-NEXT: addiw a1, a1, 1329 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: li a1, 32 -; RV64I-NEXT: beqz s1, .LBB9_2 +; RV64I-NEXT: beqz s0, .LBB9_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI9_0) @@ -499,26 +497,22 @@ ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a1, 0(a0) ; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: beqz s1, .LBB9_4 -; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: addi s0, a1, 1 -; RV64I-NEXT: .LBB9_4: -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: addi a0, a1, 1 +; RV64I-NEXT: seqz a1, s0 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ffs_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: mv a1, a0 -; RV64ZBB-NEXT: li a0, 0 -; RV64ZBB-NEXT: beqz a1, .LBB9_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: ctzw a0, a1 -; RV64ZBB-NEXT: addi a0, a0, 1 -; RV64ZBB-NEXT: .LBB9_2: +; RV64ZBB-NEXT: ctzw a1, a0 +; RV64ZBB-NEXT: addi a1, a1, 1 +; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) %2 = add i32 %1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -737,67 +737,59 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v2, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -809,37 +801,34 @@ ; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fpext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fpext-vp.ll @@ -95,23 +95,21 @@ ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v8, 16 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfwcvt.f.f.v v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB7_4: +; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll @@ -102,21 +102,19 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB7_4: +; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -61,21 +61,19 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: addi a2, a0, -64 ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: bltu a0, a2, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: addi a1, a0, -64 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB4_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 64 -; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 @@ -257,217 +255,219 @@ ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v3, v0, 8 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v2, v0, 4 +; CHECK-NEXT: vslidedown.vi v27, v3, 4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v27, 2 +; CHECK-NEXT: addi a2, a1, 512 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: addi a3, a1, 640 +; CHECK-NEXT: vle64.v v8, (a3) ; CHECK-NEXT: addi a3, a7, -64 -; CHECK-NEXT: vslidedown.vi v2, v0, 8 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a7, a3, .LBB16_2 +; CHECK-NEXT: sltu a4, a7, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: addi a3, a4, -32 +; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a3, a5, a3 +; CHECK-NEXT: addi a5, a3, -16 +; CHECK-NEXT: sltu a6, a3, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a2, a2, a6 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: bltu a3, a2, .LBB16_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v3, v2, 4 -; CHECK-NEXT: addi a6, a4, -32 -; CHECK-NEXT: addi a3, a1, 640 -; CHECK-NEXT: mv a5, a2 -; CHECK-NEXT: bltu a4, a6, .LBB16_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: .LBB16_4: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v3, 2 +; CHECK-NEXT: vslidedown.vi v4, v2, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a3) -; CHECK-NEXT: addi t0, a5, -16 -; CHECK-NEXT: addi a6, a1, 512 -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: bltu a5, t0, .LBB16_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a3, t0 -; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vle64.v v8, (a5) ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: li a3, 64 +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload ; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a7, a3, .LBB16_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a7, 64 +; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a5, a7, -32 +; CHECK-NEXT: sltu a6, a7, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: addi a6, a5, -16 +; CHECK-NEXT: sltu t0, a5, a6 +; CHECK-NEXT: addi t0, t0, -1 +; CHECK-NEXT: and a6, t0, a6 +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t ; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 4 +; CHECK-NEXT: slli a6, a6, 3 ; CHECK-NEXT: add a6, sp, a6 ; CHECK-NEXT: addi a6, a6, 16 ; CHECK-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a5, a3, .LBB16_8 -; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: bltu a5, a2, .LBB16_6 +; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a5, 16 -; CHECK-NEXT: .LBB16_8: +; CHECK-NEXT: .LBB16_6: +; CHECK-NEXT: addi a6, a1, 384 +; CHECK-NEXT: addi a1, a1, 256 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: li t0, 48 -; CHECK-NEXT: mul a6, a6, t0 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a5, .LBB16_10 +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li t0, 40 +; CHECK-NEXT: mul a5, a5, t0 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a4, a3, .LBB16_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: .LBB16_8: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v4, v3, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a6) +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: bltu a4, a2, .LBB16_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a7, 64 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB16_10: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v3, v1, 4 -; CHECK-NEXT: addi t0, a7, -32 -; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: mv a6, a2 -; CHECK-NEXT: bltu a7, t0, .LBB16_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: mv a6, t0 -; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v3, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) -; CHECK-NEXT: addi a5, a6, -16 -; CHECK-NEXT: mv t0, a2 -; CHECK-NEXT: bltu a6, a5, .LBB16_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: mv t0, a5 -; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v24, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a6, a3, .LBB16_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: .LBB16_16: -; CHECK-NEXT: addi t0, a1, 384 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v2, v1, 2 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: li t1, 40 -; CHECK-NEXT: mul a6, a6, t1 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a5, .LBB16_18 -; CHECK-NEXT: # %bb.17: -; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: .LBB16_18: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v2, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (t0) -; CHECK-NEXT: addi t0, a4, -16 -; CHECK-NEXT: addi a6, a1, 256 -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: bltu a4, t0, .LBB16_20 -; CHECK-NEXT: # %bb.19: -; CHECK-NEXT: mv a1, t0 -; CHECK-NEXT: .LBB16_20: -; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: addi a1, a4, -16 +; CHECK-NEXT: sltu a4, a4, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a3, .LBB16_22 -; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB16_22: -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t -; CHECK-NEXT: bltu a7, a5, .LBB16_24 -; CHECK-NEXT: # %bb.23: +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a7, a3, .LBB16_12 +; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: li a7, 32 -; CHECK-NEXT: .LBB16_24: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a1, a7, -16 -; CHECK-NEXT: vslidedown.vi v0, v1, 2 -; CHECK-NEXT: bltu a7, a1, .LBB16_26 -; CHECK-NEXT: # %bb.25: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB16_26: -; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, ma +; CHECK-NEXT: .LBB16_12: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 40 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 40 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: addi a1, a7, -16 +; CHECK-NEXT: sltu a4, a7, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a3, .LBB16_28 -; CHECK-NEXT: # %bb.27: +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a7, a2, .LBB16_14 +; CHECK-NEXT: # %bb.13: ; CHECK-NEXT: li a7, 16 -; CHECK-NEXT: .LBB16_28: +; CHECK-NEXT: .LBB16_14: ; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, ma +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v24, v16, 16 +; CHECK-NEXT: vse32.v v24, (a0) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a3, 40 @@ -507,21 +507,19 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB17_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB17_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -666,62 +666,62 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -734,69 +734,58 @@ define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -666,62 +666,62 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -734,69 +734,58 @@ define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -192,30 +192,30 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 1 ; RV32-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-NEXT: fld ft0, %lo(.LCPI10_0)(a0) ; RV32-NEXT: lui a0, %hi(.LCPI10_1) ; RV32-NEXT: fld ft1, %lo(.LCPI10_1)(a0) +; RV32-NEXT: vslidedown.vi v9, v8, 1 ; RV32-NEXT: vfmv.f.s ft2, v9 -; RV32-NEXT: feq.d a0, ft2, ft2 -; RV32-NEXT: beqz a0, .LBB10_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB10_2: +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz +; RV32-NEXT: feq.d a2, ft2, ft2 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s ft2, v8 -; RV32-NEXT: feq.d a0, ft2, ft2 -; RV32-NEXT: beqz a0, .LBB10_4 -; RV32-NEXT: # %bb.3: ; RV32-NEXT: fmax.d ft0, ft2, ft0 ; RV32-NEXT: fmin.d ft0, ft0, ft1 ; RV32-NEXT: fcvt.w.d a0, ft0, rtz -; RV32-NEXT: .LBB10_4: +; RV32-NEXT: feq.d a2, ft2, ft2 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, tu, ma ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vse8.v v9, (a1) @@ -226,30 +226,30 @@ ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 ; RV64-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-NEXT: fld ft0, %lo(.LCPI10_0)(a0) ; RV64-NEXT: lui a0, %hi(.LCPI10_1) ; RV64-NEXT: fld ft1, %lo(.LCPI10_1)(a0) +; RV64-NEXT: vslidedown.vi v9, v8, 1 ; RV64-NEXT: vfmv.f.s ft2, v9 -; RV64-NEXT: feq.d a0, ft2, ft2 -; RV64-NEXT: beqz a0, .LBB10_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB10_2: +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz +; RV64-NEXT: feq.d a2, ft2, ft2 +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfmv.f.s ft2, v8 -; RV64-NEXT: feq.d a0, ft2, ft2 -; RV64-NEXT: beqz a0, .LBB10_4 -; RV64-NEXT: # %bb.3: ; RV64-NEXT: fmax.d ft0, ft2, ft0 ; RV64-NEXT: fmin.d ft0, ft0, ft1 ; RV64-NEXT: fcvt.l.d a0, ft0, rtz -; RV64-NEXT: .LBB10_4: +; RV64-NEXT: feq.d a2, ft2, ft2 +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, tu, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vse8.v v9, (a1) @@ -329,102 +329,84 @@ ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: fld ft1, %lo(.LCPI12_1)(a0) ; RV32-NEXT: vfmv.f.s ft2, v8 -; RV32-NEXT: feq.d a0, ft2, ft2 -; RV32-NEXT: beqz a0, .LBB12_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_2: +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz +; RV32-NEXT: feq.d a2, ft2, ft2 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: sb a0, 8(sp) ; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV32-NEXT: vslidedown.vi v12, v8, 7 ; RV32-NEXT: vfmv.f.s ft2, v12 -; RV32-NEXT: feq.d a0, ft2, ft2 -; RV32-NEXT: beqz a0, .LBB12_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_4: +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz +; RV32-NEXT: feq.d a2, ft2, ft2 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 15(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 6 ; RV32-NEXT: vfmv.f.s ft2, v12 +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz ; RV32-NEXT: feq.d a2, ft2, ft2 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: bnez a2, .LBB12_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB12_7 -; RV32-NEXT: .LBB12_6: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_7: +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 14(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 5 ; RV32-NEXT: vfmv.f.s ft2, v12 +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz ; RV32-NEXT: feq.d a2, ft2, ft2 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: bnez a2, .LBB12_9 -; RV32-NEXT: # %bb.8: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB12_10 -; RV32-NEXT: .LBB12_9: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_10: +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 4 ; RV32-NEXT: vfmv.f.s ft2, v12 +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz ; RV32-NEXT: feq.d a2, ft2, ft2 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: bnez a2, .LBB12_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB12_13 -; RV32-NEXT: .LBB12_12: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_13: +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 12(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 3 ; RV32-NEXT: vfmv.f.s ft2, v12 +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz ; RV32-NEXT: feq.d a2, ft2, ft2 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: bnez a2, .LBB12_15 -; RV32-NEXT: # %bb.14: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB12_16 -; RV32-NEXT: .LBB12_15: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_16: +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 11(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 2 ; RV32-NEXT: vfmv.f.s ft2, v12 +; RV32-NEXT: fmax.d ft3, ft2, ft0 +; RV32-NEXT: fmin.d ft3, ft3, ft1 +; RV32-NEXT: fcvt.w.d a0, ft3, rtz ; RV32-NEXT: feq.d a2, ft2, ft2 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: bnez a2, .LBB12_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB12_19 -; RV32-NEXT: .LBB12_18: -; RV32-NEXT: fmax.d ft2, ft2, ft0 -; RV32-NEXT: fmin.d ft2, ft2, ft1 -; RV32-NEXT: fcvt.w.d a0, ft2, rtz -; RV32-NEXT: .LBB12_19: +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 10(sp) ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s ft2, v8 -; RV32-NEXT: feq.d a2, ft2, ft2 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: bnez a2, .LBB12_21 -; RV32-NEXT: # %bb.20: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB12_22 -; RV32-NEXT: .LBB12_21: ; RV32-NEXT: fmax.d ft0, ft2, ft0 ; RV32-NEXT: fmin.d ft0, ft0, ft1 ; RV32-NEXT: fcvt.w.d a0, ft0, rtz -; RV32-NEXT: .LBB12_22: +; RV32-NEXT: feq.d a2, ft2, ft2 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: sb a0, 9(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma @@ -444,102 +426,84 @@ ; RV64-NEXT: lui a0, %hi(.LCPI12_1) ; RV64-NEXT: fld ft1, %lo(.LCPI12_1)(a0) ; RV64-NEXT: vfmv.f.s ft2, v8 -; RV64-NEXT: feq.d a0, ft2, ft2 -; RV64-NEXT: beqz a0, .LBB12_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_2: +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz +; RV64-NEXT: feq.d a2, ft2, ft2 +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: sb a0, 8(sp) ; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 7 ; RV64-NEXT: vfmv.f.s ft2, v12 -; RV64-NEXT: feq.d a0, ft2, ft2 -; RV64-NEXT: beqz a0, .LBB12_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_4: +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz +; RV64-NEXT: feq.d a2, ft2, ft2 +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: sb a0, 15(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 6 ; RV64-NEXT: vfmv.f.s ft2, v12 +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz ; RV64-NEXT: feq.d a2, ft2, ft2 -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: bnez a2, .LBB12_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB12_7 -; RV64-NEXT: .LBB12_6: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_7: +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: sb a0, 14(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 5 ; RV64-NEXT: vfmv.f.s ft2, v12 +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz ; RV64-NEXT: feq.d a2, ft2, ft2 -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: bnez a2, .LBB12_9 -; RV64-NEXT: # %bb.8: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB12_10 -; RV64-NEXT: .LBB12_9: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_10: +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 4 ; RV64-NEXT: vfmv.f.s ft2, v12 +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz ; RV64-NEXT: feq.d a2, ft2, ft2 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: bnez a2, .LBB12_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB12_13 -; RV64-NEXT: .LBB12_12: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_13: +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: sb a0, 12(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 3 ; RV64-NEXT: vfmv.f.s ft2, v12 +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz ; RV64-NEXT: feq.d a2, ft2, ft2 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: bnez a2, .LBB12_15 -; RV64-NEXT: # %bb.14: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB12_16 -; RV64-NEXT: .LBB12_15: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_16: +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: sb a0, 11(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 2 ; RV64-NEXT: vfmv.f.s ft2, v12 +; RV64-NEXT: fmax.d ft3, ft2, ft0 +; RV64-NEXT: fmin.d ft3, ft3, ft1 +; RV64-NEXT: fcvt.l.d a0, ft3, rtz ; RV64-NEXT: feq.d a2, ft2, ft2 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: bnez a2, .LBB12_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB12_19 -; RV64-NEXT: .LBB12_18: -; RV64-NEXT: fmax.d ft2, ft2, ft0 -; RV64-NEXT: fmin.d ft2, ft2, ft1 -; RV64-NEXT: fcvt.l.d a0, ft2, rtz -; RV64-NEXT: .LBB12_19: +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: sb a0, 10(sp) ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vfmv.f.s ft2, v8 -; RV64-NEXT: feq.d a2, ft2, ft2 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: bnez a2, .LBB12_21 -; RV64-NEXT: # %bb.20: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB12_22 -; RV64-NEXT: .LBB12_21: ; RV64-NEXT: fmax.d ft0, ft2, ft0 ; RV64-NEXT: fmin.d ft0, ft0, ft1 ; RV64-NEXT: fcvt.l.d a0, ft0, rtz -; RV64-NEXT: .LBB12_22: +; RV64-NEXT: feq.d a2, ft2, ft2 +; RV64-NEXT: seqz a2, a2 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: sb a0, 9(sp) ; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -317,21 +317,19 @@ ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t @@ -343,21 +341,20 @@ define <32 x i64> @vfptosi_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -317,21 +317,19 @@ ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t @@ -343,21 +341,20 @@ define <32 x i64> @vfptoui_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -121,27 +121,26 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -32 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a0, a2, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vslidedown.vi v24, v0, 4 -; CHECK-NEXT: bltu a0, a2, .LBB8_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: .LBB8_4: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: addi a1, a0, -32 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -153,27 +152,26 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -32 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a0, a2, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vslidedown.vi v24, v0, 4 -; CHECK-NEXT: bltu a0, a2, .LBB9_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: addi a1, a0, -32 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -845,27 +845,26 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a3, a1, -32 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a1, a3, .LBB49_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB49_2: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vslidedown.vi v24, v0, 4 -; CHECK-NEXT: bltu a1, a3, .LBB49_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB49_4: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB49_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: .LBB49_2: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, ma ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, ma +; CHECK-NEXT: addi a0, a1, -32 +; CHECK-NEXT: sltu a1, a1, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vredxor.vs v8, v16, v8, v0.t ; CHECK-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -251,30 +251,28 @@ define signext i1 @vpreduce_and_v256i1(i1 signext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v256i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a1, -128 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a1, a2, .LBB14_2 +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB14_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB14_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmnot.m v8, v8 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vcpop.m a2, v8, v0.t -; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmnot.m v11, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vcpop.m a2, v11, v0.t ; CHECK-NEXT: seqz a2, a2 -; CHECK-NEXT: bltu a1, a3, .LBB14_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB14_4: +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmnot.m v8, v11 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vcpop.m a1, v8, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: and a0, a2, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v256i1(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -666,62 +666,62 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -734,69 +734,58 @@ define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -666,62 +666,62 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -734,69 +734,58 @@ define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -666,62 +666,62 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -734,69 +734,58 @@ define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -546,41 +546,48 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: addi a4, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v24, (a4) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: addi a4, a2, -64 +; CHECK-NEXT: addi a0, a2, -64 +; CHECK-NEXT: sltu a3, a2, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: bltu a2, a4, .LBB43_2 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB43_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB43_2: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB43_4 -; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: .LBB43_4: +; CHECK-NEXT: .LBB43_2: ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 16, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v16, v2, 8 +; CHECK-NEXT: vslideup.vi v16, v1, 8 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1149,62 +1156,48 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v2, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB87_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB87_2: -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: addi a1, a2, -16 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB87_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a0, .LBB87_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB87_4: +; CHECK-NEXT: .LBB87_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vi v16, v1, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -631,59 +631,46 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: li a4, 128 -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: bltu a3, a4, .LBB51_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vlm.v v24, (a2) -; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: addi a0, a3, -128 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v1, v8, v16, v0.t -; CHECK-NEXT: bltu a3, a0, .LBB51_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a0 -; CHECK-NEXT: .LBB51_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vlm.v v0, (a2) +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle8.v v8, (a2) +; CHECK-NEXT: addi a2, a3, -128 +; CHECK-NEXT: sltu a4, a3, a2 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a0, a4, a2 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a3, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -694,23 +681,20 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a1) -; CHECK-NEXT: addi a4, a2, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a4, .LBB52_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: .LBB52_2: +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: addi a1, a2, -128 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB52_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a3, .LBB52_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB52_4: +; CHECK-NEXT: .LBB52_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t @@ -726,23 +710,20 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a1) -; CHECK-NEXT: addi a4, a2, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a4, .LBB53_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: .LBB53_2: +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: addi a1, a2, -128 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB53_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a3, .LBB53_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB53_4: +; CHECK-NEXT: .LBB53_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t @@ -1338,41 +1319,48 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a4, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v24, (a4) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: addi a4, a2, -32 +; CHECK-NEXT: addi a0, a2, -32 +; CHECK-NEXT: sltu a3, a2, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: bltu a2, a4, .LBB99_2 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB99_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB99_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB99_4 -; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: .LBB99_4: +; CHECK-NEXT: .LBB99_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v16, v2, 4 +; CHECK-NEXT: vslideup.vi v16, v1, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1384,21 +1372,19 @@ ; CHECK-LABEL: icmp_eq_vx_v64i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: addi a3, a1, -32 ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: bltu a1, a3, .LBB100_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB100_2: +; CHECK-NEXT: addi a2, a1, -32 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB100_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB100_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB100_4: +; CHECK-NEXT: .LBB100_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t @@ -1416,21 +1402,19 @@ ; CHECK-LABEL: icmp_eq_vx_swap_v64i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: addi a3, a1, -32 ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: bltu a1, a3, .LBB101_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB101_2: +; CHECK-NEXT: addi a2, a1, -32 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB101_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB101_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB101_4: +; CHECK-NEXT: .LBB101_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -154,23 +154,21 @@ ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v8, 16 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsext.vf2 v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vsext.vf2 v24, v8, v0.t @@ -183,21 +181,19 @@ define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a0, a2, .LBB13_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v8, 16 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsext.vf2 v16, v24 -; CHECK-NEXT: bltu a0, a1, .LBB13_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB13_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: .LBB13_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v24, v8 ; CHECK-NEXT: vmv.v.v v8, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -309,21 +309,19 @@ ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -335,21 +333,20 @@ define <32 x double> @vsitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.f.x.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -605,50 +605,48 @@ define <32 x double> @strided_vpload_v32f64(double* %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind { ; CHECK-RV32-LABEL: strided_vpload_v32f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi a4, a2, -16 +; CHECK-RV32-NEXT: li a4, 16 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: li a3, 0 +; CHECK-RV32-NEXT: mv a3, a2 ; CHECK-RV32-NEXT: bltu a2, a4, .LBB33_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a3, a4 +; CHECK-RV32-NEXT: li a3, 16 ; CHECK-RV32-NEXT: .LBB33_2: -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: bltu a2, a4, .LBB33_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: li a2, 16 -; CHECK-RV32-NEXT: .LBB33_4: -; CHECK-RV32-NEXT: mul a4, a2, a1 +; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 +; CHECK-RV32-NEXT: addi a5, a2, -16 +; CHECK-RV32-NEXT: sltu a2, a2, a5 +; CHECK-RV32-NEXT: addi a2, a2, -1 +; CHECK-RV32-NEXT: and a2, a2, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_vpload_v32f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: addi a4, a2, -16 +; CHECK-RV64-NEXT: li a4, 16 ; CHECK-RV64-NEXT: vmv1r.v v8, v0 -; CHECK-RV64-NEXT: li a3, 0 +; CHECK-RV64-NEXT: mv a3, a2 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB33_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a3, a4 +; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB33_2: -; CHECK-RV64-NEXT: li a4, 16 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB33_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: li a2, 16 -; CHECK-RV64-NEXT: .LBB33_4: -; CHECK-RV64-NEXT: mul a4, a2, a1 +; CHECK-RV64-NEXT: mul a4, a3, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 +; CHECK-RV64-NEXT: addi a5, a2, -16 +; CHECK-RV64-NEXT: sltu a2, a2, a5 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV64-NEXT: ret @@ -659,43 +657,41 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) nounwind { ; CHECK-RV32-LABEL: strided_vpload_v32f64_allones_mask: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi a4, a2, -16 -; CHECK-RV32-NEXT: li a3, 0 +; CHECK-RV32-NEXT: li a4, 16 +; CHECK-RV32-NEXT: mv a3, a2 ; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a3, a4 +; CHECK-RV32-NEXT: li a3, 16 ; CHECK-RV32-NEXT: .LBB34_2: -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: li a2, 16 -; CHECK-RV32-NEXT: .LBB34_4: -; CHECK-RV32-NEXT: mul a4, a2, a1 +; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 -; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1 +; CHECK-RV32-NEXT: addi a5, a2, -16 +; CHECK-RV32-NEXT: sltu a2, a2, a5 +; CHECK-RV32-NEXT: addi a2, a2, -1 +; CHECK-RV32-NEXT: and a2, a2, a5 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1 +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_vpload_v32f64_allones_mask: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: addi a4, a2, -16 -; CHECK-RV64-NEXT: li a3, 0 +; CHECK-RV64-NEXT: li a4, 16 +; CHECK-RV64-NEXT: mv a3, a2 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a3, a4 +; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB34_2: -; CHECK-RV64-NEXT: li a4, 16 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: li a2, 16 -; CHECK-RV64-NEXT: .LBB34_4: -; CHECK-RV64-NEXT: mul a4, a2, a1 +; CHECK-RV64-NEXT: mul a4, a3, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 -; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1 +; CHECK-RV64-NEXT: addi a5, a2, -16 +; CHECK-RV64-NEXT: sltu a2, a2, a5 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a5 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1 +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1 ; CHECK-RV64-NEXT: ret %one = insertelement <32 x i1> poison, i1 true, i32 0 @@ -717,51 +713,46 @@ ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: .LBB35_2: -; CHECK-RV32-NEXT: addi a5, a3, -16 -; CHECK-RV32-NEXT: li a7, 0 -; CHECK-RV32-NEXT: bltu a3, a5, .LBB35_4 +; CHECK-RV32-NEXT: mul a5, a3, a2 +; CHECK-RV32-NEXT: addi a6, a4, -32 +; CHECK-RV32-NEXT: sltu a4, a4, a6 +; CHECK-RV32-NEXT: addi a4, a4, -1 +; CHECK-RV32-NEXT: and a6, a4, a6 +; CHECK-RV32-NEXT: li a4, 16 +; CHECK-RV32-NEXT: add a5, a1, a5 +; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a7, a5 -; CHECK-RV32-NEXT: .LBB35_4: ; CHECK-RV32-NEXT: li a6, 16 -; CHECK-RV32-NEXT: mv a5, a3 -; CHECK-RV32-NEXT: bltu a3, a6, .LBB35_6 +; CHECK-RV32-NEXT: .LBB35_4: +; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t +; CHECK-RV32-NEXT: addi a5, a3, -16 +; CHECK-RV32-NEXT: sltu a6, a3, a5 +; CHECK-RV32-NEXT: addi a6, a6, -1 +; CHECK-RV32-NEXT: and a5, a6, a5 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: li a5, 16 +; CHECK-RV32-NEXT: li a3, 16 ; CHECK-RV32-NEXT: .LBB35_6: -; CHECK-RV32-NEXT: mul t0, a5, a2 -; CHECK-RV32-NEXT: add t0, a1, t0 +; CHECK-RV32-NEXT: mul a4, a3, a2 +; CHECK-RV32-NEXT: add a4, a1, a4 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (t0), a2, v0.t -; CHECK-RV32-NEXT: addi t0, a4, -32 -; CHECK-RV32-NEXT: li a7, 0 -; CHECK-RV32-NEXT: bltu a4, t0, .LBB35_8 -; CHECK-RV32-NEXT: # %bb.7: -; CHECK-RV32-NEXT: mv a7, t0 -; CHECK-RV32-NEXT: .LBB35_8: -; CHECK-RV32-NEXT: bltu a7, a6, .LBB35_10 -; CHECK-RV32-NEXT: # %bb.9: -; CHECK-RV32-NEXT: li a7, 16 -; CHECK-RV32-NEXT: .LBB35_10: -; CHECK-RV32-NEXT: mul a3, a3, a2 -; CHECK-RV32-NEXT: add a3, a1, a3 -; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a3), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV32-NEXT: vse64.v v8, (a0) ; CHECK-RV32-NEXT: addi a1, a0, 256 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v24, (a1) +; CHECK-RV32-NEXT: vse64.v v16, (a1) ; CHECK-RV32-NEXT: addi a0, a0, 128 ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v16, (a0) +; CHECK-RV32-NEXT: vse64.v v24, (a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_v33f64: @@ -773,51 +764,46 @@ ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a4, 32 ; CHECK-RV64-NEXT: .LBB35_2: -; CHECK-RV64-NEXT: addi a5, a4, -16 -; CHECK-RV64-NEXT: li a7, 0 -; CHECK-RV64-NEXT: bltu a4, a5, .LBB35_4 +; CHECK-RV64-NEXT: mul a5, a4, a2 +; CHECK-RV64-NEXT: addi a6, a3, -32 +; CHECK-RV64-NEXT: sltu a3, a3, a6 +; CHECK-RV64-NEXT: addi a3, a3, -1 +; CHECK-RV64-NEXT: and a6, a3, a6 +; CHECK-RV64-NEXT: li a3, 16 +; CHECK-RV64-NEXT: add a5, a1, a5 +; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4 ; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a7, a5 -; CHECK-RV64-NEXT: .LBB35_4: ; CHECK-RV64-NEXT: li a6, 16 -; CHECK-RV64-NEXT: mv a5, a4 -; CHECK-RV64-NEXT: bltu a4, a6, .LBB35_6 +; CHECK-RV64-NEXT: .LBB35_4: +; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t +; CHECK-RV64-NEXT: addi a5, a4, -16 +; CHECK-RV64-NEXT: sltu a6, a4, a5 +; CHECK-RV64-NEXT: addi a6, a6, -1 +; CHECK-RV64-NEXT: and a5, a6, a5 +; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6 ; CHECK-RV64-NEXT: # %bb.5: -; CHECK-RV64-NEXT: li a5, 16 +; CHECK-RV64-NEXT: li a4, 16 ; CHECK-RV64-NEXT: .LBB35_6: -; CHECK-RV64-NEXT: mul t0, a5, a2 -; CHECK-RV64-NEXT: add t0, a1, t0 -; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (t0), a2, v0.t -; CHECK-RV64-NEXT: addi t0, a3, -32 -; CHECK-RV64-NEXT: li a7, 0 -; CHECK-RV64-NEXT: bltu a3, t0, .LBB35_8 -; CHECK-RV64-NEXT: # %bb.7: -; CHECK-RV64-NEXT: mv a7, t0 -; CHECK-RV64-NEXT: .LBB35_8: -; CHECK-RV64-NEXT: bltu a7, a6, .LBB35_10 -; CHECK-RV64-NEXT: # %bb.9: -; CHECK-RV64-NEXT: li a7, 16 -; CHECK-RV64-NEXT: .LBB35_10: ; CHECK-RV64-NEXT: mul a3, a4, a2 ; CHECK-RV64-NEXT: add a3, a1, a3 -; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t +; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t +; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV64-NEXT: vse64.v v8, (a0) ; CHECK-RV64-NEXT: addi a1, a0, 256 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v24, (a1) +; CHECK-RV64-NEXT: vse64.v v16, (a1) ; CHECK-RV64-NEXT: addi a0, a0, 128 ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v16, (a0) +; CHECK-RV64-NEXT: vse64.v v24, (a0) ; CHECK-RV64-NEXT: ret %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0f64.i64(double* %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) ret <33 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -503,19 +503,17 @@ ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 16 ; CHECK-RV32-NEXT: .LBB27_2: -; CHECK-RV32-NEXT: li a4, 0 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: addi a5, a2, -16 ; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV32-NEXT: bltu a2, a5, .LBB27_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a4, a5 -; CHECK-RV32-NEXT: .LBB27_4: -; CHECK-RV32-NEXT: mul a2, a3, a1 -; CHECK-RV32-NEXT: add a0, a0, a2 +; CHECK-RV32-NEXT: mul a3, a3, a1 +; CHECK-RV32-NEXT: add a0, a0, a3 +; CHECK-RV32-NEXT: addi a3, a2, -16 +; CHECK-RV32-NEXT: sltu a2, a2, a3 +; CHECK-RV32-NEXT: addi a2, a2, -1 +; CHECK-RV32-NEXT: and a2, a2, a3 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV32-NEXT: ret ; @@ -527,19 +525,17 @@ ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB27_2: -; CHECK-RV64-NEXT: li a4, 0 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: addi a5, a2, -16 ; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV64-NEXT: bltu a2, a5, .LBB27_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a4, a5 -; CHECK-RV64-NEXT: .LBB27_4: -; CHECK-RV64-NEXT: mul a2, a3, a1 -; CHECK-RV64-NEXT: add a0, a0, a2 +; CHECK-RV64-NEXT: mul a3, a3, a1 +; CHECK-RV64-NEXT: add a0, a0, a3 +; CHECK-RV64-NEXT: addi a3, a2, -16 +; CHECK-RV64-NEXT: sltu a2, a2, a3 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a3 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV64-NEXT: ret call void @llvm.experimental.vp.strided.store.v32f64.p0f64.i32(<32 x double> %v, double* %ptr, i32 %stride, <32 x i1> %mask, i32 %evl) @@ -555,17 +551,15 @@ ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 16 ; CHECK-RV32-NEXT: .LBB28_2: -; CHECK-RV32-NEXT: li a4, 0 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: addi a5, a2, -16 ; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1 -; CHECK-RV32-NEXT: bltu a2, a5, .LBB28_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a4, a5 -; CHECK-RV32-NEXT: .LBB28_4: -; CHECK-RV32-NEXT: mul a2, a3, a1 -; CHECK-RV32-NEXT: add a0, a0, a2 -; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV32-NEXT: mul a3, a3, a1 +; CHECK-RV32-NEXT: add a0, a0, a3 +; CHECK-RV32-NEXT: addi a3, a2, -16 +; CHECK-RV32-NEXT: sltu a2, a2, a3 +; CHECK-RV32-NEXT: addi a2, a2, -1 +; CHECK-RV32-NEXT: and a2, a2, a3 +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1 ; CHECK-RV32-NEXT: ret ; @@ -577,17 +571,15 @@ ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB28_2: -; CHECK-RV64-NEXT: li a4, 0 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: addi a5, a2, -16 ; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1 -; CHECK-RV64-NEXT: bltu a2, a5, .LBB28_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a4, a5 -; CHECK-RV64-NEXT: .LBB28_4: -; CHECK-RV64-NEXT: mul a2, a3, a1 -; CHECK-RV64-NEXT: add a0, a0, a2 -; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV64-NEXT: mul a3, a3, a1 +; CHECK-RV64-NEXT: add a0, a0, a3 +; CHECK-RV64-NEXT: addi a3, a2, -16 +; CHECK-RV64-NEXT: sltu a2, a2, a3 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a3 +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1 ; CHECK-RV64-NEXT: ret %one = insertelement <32 x i1> poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -309,21 +309,19 @@ ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t @@ -335,21 +333,20 @@ define <32 x double> @vuitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.f.xu.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -413,23 +413,20 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a0) -; CHECK-NEXT: addi a3, a1, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: bltu a1, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: addi a0, a1, -128 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t @@ -443,21 +440,20 @@ define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_v258i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -128 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB33_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: vadd.vi v16, v16, -1 -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -128 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1533,24 +1529,22 @@ ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a1, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB108_2: +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB108_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB108_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB108_4: +; RV32-NEXT: .LBB108_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t @@ -1559,21 +1553,19 @@ ; RV64-LABEL: vadd_vx_v32i64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a1, 0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a2, a0, -16 ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB108_2: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a2, a0, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: bltu a0, a1, .LBB108_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a1, .LBB108_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB108_4: +; RV64-NEXT: .LBB108_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t @@ -1587,43 +1579,41 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vadd_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: bltu a0, a1, .LBB109_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB109_4: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vi_v32i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a0, a1, .LBB109_2 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB109_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a1, 16 -; RV64-NEXT: vadd.vi v16, v16, -1 -; RV64-NEXT: bltu a0, a1, .LBB109_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB109_4: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: .LBB109_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v8, v8, -1 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v16, -1 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -324,59 +324,45 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: addi a1, a2, -16 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a0, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -390,22 +376,21 @@ ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v24 -; CHECK-NEXT: bltu a2, a0, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfsgnj.vv v16, v16, v24 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -322,21 +322,19 @@ ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfabs.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfabs.v v8, v8, v0.t @@ -348,21 +346,20 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB27_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfabs.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -657,109 +657,80 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a4, -16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a4, a3, .LBB50_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: addi a2, a4, -16 +; CHECK-NEXT: sltu a3, a4, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, a0, 128 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v8, (a3) +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a0, .LBB50_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a4, a0, .LBB50_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB50_4: +; CHECK-NEXT: .LBB50_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -780,50 +751,49 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a3, a4, -16 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a2) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a4, a3, .LBB51_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: bltu a4, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v24, v16, v8 -; CHECK-NEXT: bltu a4, a0, .LBB51_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB51_4: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v0, v8, v24 +; CHECK-NEXT: addi a0, a4, -16 +; CHECK-NEXT: sltu a1, a4, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v0, v16, v8 -; CHECK-NEXT: vmv.v.v v8, v0 -; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -324,59 +324,45 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: addi a1, a2, -16 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a0, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -390,22 +376,21 @@ ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmax.vv v16, v16, v24 -; CHECK-NEXT: bltu a2, a0, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -324,59 +324,45 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: addi a1, a2, -16 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a0, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -390,22 +376,21 @@ ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmin.vv v16, v16, v24 -; CHECK-NEXT: bltu a2, a0, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -657,109 +657,80 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a4, -16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a4, a3, .LBB50_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: addi a2, a4, -16 +; CHECK-NEXT: sltu a3, a4, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, a0, 128 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v8, (a3) +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a0, .LBB50_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a4, a0, .LBB50_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB50_4: +; CHECK-NEXT: .LBB50_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -780,50 +751,49 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a3, a4, -16 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a2) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a4, a3, .LBB51_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: bltu a4, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v24, v16, v8 -; CHECK-NEXT: bltu a4, a0, .LBB51_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB51_4: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v0, v8, v24 +; CHECK-NEXT: addi a0, a4, -16 +; CHECK-NEXT: sltu a1, a4, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v0, v16, v8 -; CHECK-NEXT: vmv.v.v v8, v0 -; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -322,21 +322,19 @@ ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfneg.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfneg.v v8, v8, v0.t @@ -348,21 +346,20 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB27_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfneg.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfneg.v v16, v16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -322,21 +322,19 @@ ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t @@ -348,21 +346,20 @@ define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB27_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsqrt.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -288,23 +288,20 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a1) -; CHECK-NEXT: addi a4, a2, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a4, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: addi a1, a2, -128 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB22_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a3, .LBB22_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t @@ -318,21 +315,20 @@ define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_v258i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a1, -128 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB23_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: vmax.vx v16, v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB23_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB23_4: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1100,24 +1096,22 @@ ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a1, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB74_2: +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vmax.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB74_4: +; RV32-NEXT: .LBB74_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vmax.vv v8, v8, v24, v0.t @@ -1126,22 +1120,20 @@ ; RV64-LABEL: vmax_vx_v32i64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: bltu a0, a1, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB74_2: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a2, a0, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a1 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 ; RV64-NEXT: vmax.vx v16, v16, a1, v0.t -; RV64-NEXT: bltu a0, a2, .LBB74_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a2, .LBB74_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB74_4: +; RV64-NEXT: .LBB74_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmax.vx v8, v8, a1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -287,23 +287,20 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a1) -; CHECK-NEXT: addi a4, a2, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a4, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: addi a1, a2, -128 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB22_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a3, .LBB22_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t @@ -317,21 +314,20 @@ define <256 x i8> @vmaxu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_v258i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a1, -128 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB23_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: vmaxu.vx v16, v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB23_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB23_4: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1099,24 +1095,22 @@ ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a1, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB74_2: +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB74_4: +; RV32-NEXT: .LBB74_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t @@ -1125,22 +1119,20 @@ ; RV64-LABEL: vmaxu_vx_v32i64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: bltu a0, a1, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB74_2: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a2, a0, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a1 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 ; RV64-NEXT: vmaxu.vx v16, v16, a1, v0.t -; RV64-NEXT: bltu a0, a2, .LBB74_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a2, .LBB74_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB74_4: +; RV64-NEXT: .LBB74_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -288,23 +288,20 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a1) -; CHECK-NEXT: addi a4, a2, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a4, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: addi a1, a2, -128 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB22_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a3, .LBB22_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t @@ -318,21 +315,20 @@ define <256 x i8> @vmin_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_v258i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a1, -128 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB23_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: vmin.vx v16, v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB23_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB23_4: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1100,24 +1096,22 @@ ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a1, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB74_2: +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vmin.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB74_4: +; RV32-NEXT: .LBB74_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vmin.vv v8, v8, v24, v0.t @@ -1126,22 +1120,20 @@ ; RV64-LABEL: vmin_vx_v32i64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: bltu a0, a1, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB74_2: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a2, a0, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a1 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 ; RV64-NEXT: vmin.vx v16, v16, a1, v0.t -; RV64-NEXT: bltu a0, a2, .LBB74_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a2, .LBB74_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB74_4: +; RV64-NEXT: .LBB74_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmin.vx v8, v8, a1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -287,23 +287,20 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a1) -; CHECK-NEXT: addi a4, a2, -128 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a4, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: addi a1, a2, -128 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB22_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a3, .LBB22_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t @@ -317,21 +314,20 @@ define <256 x i8> @vminu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_v258i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a1, -128 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB23_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: vminu.vx v16, v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB23_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB23_4: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1099,24 +1095,22 @@ ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a1, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB74_2: +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vminu.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB74_4: +; RV32-NEXT: .LBB74_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vminu.vv v8, v8, v24, v0.t @@ -1125,22 +1119,20 @@ ; RV64-LABEL: vminu_vx_v32i64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: bltu a0, a1, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB74_2: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a2, a0, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a1 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 ; RV64-NEXT: vminu.vx v16, v16, a1, v0.t -; RV64-NEXT: bltu a0, a2, .LBB74_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a2, .LBB74_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB74_4: +; RV64-NEXT: .LBB74_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vminu.vx v8, v8, a1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -285,26 +285,24 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i8: ; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a3, .LBB13_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB13_2: +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v10, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB13_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB13_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB13_4: +; RV64-NEXT: .LBB13_2: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1894,24 +1892,22 @@ define <32 x double> @vpgather_v32f64(<32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: bltu a0, a2, .LBB86_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB86_2: +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a2, a0, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v1, 2 +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (zero), v24, v0.t ; RV32-NEXT: li a1, 16 -; RV32-NEXT: bltu a0, a1, .LBB86_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB86_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB86_4: +; RV32-NEXT: .LBB86_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t @@ -1920,22 +1916,20 @@ ; ; RV64-LABEL: vpgather_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, a0, -16 ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a1, 0 -; RV64-NEXT: bltu a0, a2, .LBB86_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB86_2: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a2, a0, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: li a1, 16 -; RV64-NEXT: bltu a0, a1, .LBB86_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a1, .LBB86_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB86_4: +; RV64-NEXT: .LBB86_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t @@ -1947,57 +1941,54 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB87_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB87_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB87_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB87_4: +; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB87_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB87_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: bltu a1, a3, .LBB87_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB87_2: ; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v10, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB87_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB87_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB87_4: +; RV64-NEXT: .LBB87_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2010,57 +2001,54 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB88_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB88_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB88_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB88_4: +; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB88_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB88_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: bltu a1, a3, .LBB88_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB88_2: ; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v10, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB88_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB88_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB88_4: +; RV64-NEXT: .LBB88_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2074,57 +2062,54 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB89_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB89_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB89_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB89_4: +; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB89_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB89_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vzext.vf8 v16, v12 ; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: bltu a1, a3, .LBB89_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v10, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB89_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB89_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB89_4: +; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2138,57 +2123,54 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB90_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB90_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB90_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB90_4: +; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB90_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB90_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: bltu a1, a3, .LBB90_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB90_2: ; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v12, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB90_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB90_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB90_4: +; RV64-NEXT: .LBB90_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2201,57 +2183,54 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB91_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB91_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB91_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB91_4: +; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB91_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB91_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v0, v16 -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: bltu a1, a3, .LBB91_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB91_2: -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsext.vf4 v0, v8 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v12, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB91_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB91_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB91_4: +; RV64-NEXT: .LBB91_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2265,57 +2244,54 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB92_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB92_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB92_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB92_4: +; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB92_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB92_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v0, v16 -; RV64-NEXT: vzext.vf4 v24, v8 -; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: bltu a1, a3, .LBB92_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB92_2: -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vzext.vf4 v24, v16 +; RV64-NEXT: vzext.vf4 v0, v8 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v12, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB92_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB92_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB92_4: +; RV64-NEXT: .LBB92_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2329,56 +2305,53 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB93_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB93_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB93_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB93_4: +; RV32-NEXT: vsll.vi v24, v8, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB93_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB93_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v0, v16 ; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf2 v0, v8 -; RV64-NEXT: bltu a1, a3, .LBB93_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB93_2: ; RV64-NEXT: vsll.vi v8, v0, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB93_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB93_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB93_4: +; RV64-NEXT: .LBB93_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t @@ -2391,29 +2364,28 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB94_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB94_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB94_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB94_4: +; RV32-NEXT: vsll.vi v24, v8, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB94_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB94_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: @@ -2425,30 +2397,28 @@ ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsext.vf2 v0, v8 -; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: bltu a1, a3, .LBB94_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsll.vi v8, v0, 3 +; RV64-NEXT: vsext.vf2 v0, v16 +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v16, v0, 3 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vl1r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl1r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB94_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB94_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB94_4: +; RV64-NEXT: .LBB94_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t @@ -2466,29 +2436,28 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB95_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB95_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB95_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB95_4: +; RV32-NEXT: vsll.vi v24, v8, 3 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a3, a1, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: li a2, 16 +; RV32-NEXT: bltu a1, a2, .LBB95_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB95_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: @@ -2500,30 +2469,28 @@ ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v16 -; RV64-NEXT: vzext.vf2 v0, v8 -; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: bltu a1, a3, .LBB95_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsll.vi v8, v0, 3 +; RV64-NEXT: vzext.vf2 v0, v16 +; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v16, v0, 3 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vl1r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl1r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB95_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB95_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB95_4: +; RV64-NEXT: .LBB95_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t @@ -2541,58 +2508,53 @@ define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a2, 0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, tu, ma +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma ; RV32-NEXT: vslideup.vi v16, v24, 16 ; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vsll.vi v24, v16, 3 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB96_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB96_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v1, 2 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB96_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB96_4: +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: bltu a1, a3, .LBB96_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB96_2: ; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a3, a1, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB96_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB96_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB96_4: +; RV64-NEXT: .LBB96_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -381,23 +381,21 @@ define <32 x double> @vpload_v32f64(<32 x double>* %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a3, a1, -16 ; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a1, a3, .LBB31_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: addi a2, a1, -16 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: addi a3, a0, 128 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a3), v0.t ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a1, a2, .LBB31_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB31_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB31_4: +; CHECK-NEXT: .LBB31_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a0), v0.t @@ -420,37 +418,33 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bltu a3, a5, .LBB32_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: addi a4, a3, -16 +; CHECK-NEXT: sltu a5, a3, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a5), v0.t -; CHECK-NEXT: addi a5, a2, -32 -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bltu a2, a5, .LBB32_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: .LBB32_6: +; CHECK-NEXT: addi a4, a2, -32 +; CHECK-NEXT: sltu a2, a2, a4 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a4, a2, a4 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a4, a2, .LBB32_8 -; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: bltu a4, a2, .LBB32_4 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB32_8: +; CHECK-NEXT: .LBB32_4: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t -; CHECK-NEXT: bltu a3, a2, .LBB32_10 -; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: bltu a3, a2, .LBB32_6 +; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB32_10: +; CHECK-NEXT: .LBB32_6: ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a1), v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1057,118 +1057,53 @@ declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32) define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpmerge_vv_v32f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a2, -16 -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 0 -; RV32-NEXT: bltu a2, a3, .LBB79_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB79_2: -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v1, 2 -; RV32-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; RV32-NEXT: li a0, 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV32-NEXT: bltu a2, a0, .LBB79_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB79_4: -; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vpmerge_vv_v32f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a1) -; RV64-NEXT: addi a3, a2, -16 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v1, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 0 -; RV64-NEXT: bltu a2, a3, .LBB79_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a3 -; RV64-NEXT: .LBB79_2: -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v1, 2 -; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; RV64-NEXT: li a0, 16 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 -; RV64-NEXT: bltu a2, a0, .LBB79_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: .LBB79_4: -; RV64-NEXT: vsetvli zero, a2, e64, m8, tu, ma -; RV64-NEXT: vmv1r.v v0, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV64-NEXT: vmv8r.v v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vpmerge_vv_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a2, -16 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: bltu a2, a0, .LBB79_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB79_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, tu, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) ret <32 x double> %v } @@ -1176,22 +1111,20 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vf_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a0, a2, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v24, 2 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 -; CHECK-NEXT: bltu a0, a1, .LBB80_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB80_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1695,20 +1695,18 @@ ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a0) -; RV32-NEXT: li a0, 16 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB79_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: bltu a1, a2, .LBB79_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 16 ; RV32-NEXT: .LBB79_2: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t -; RV32-NEXT: bltu a1, a2, .LBB79_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB79_4: +; RV32-NEXT: addi a0, a1, -16 +; RV32-NEXT: sltu a1, a1, a0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -1722,46 +1720,34 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a3, 16 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: bltu a2, a3, .LBB79_2 -; RV64-NEXT: # %bb.1: +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: li a1, 16 +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: bltu a2, a1, .LBB79_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a0, 16 ; RV64-NEXT: .LBB79_2: -; RV64-NEXT: li a3, 0 -; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a0, a2, -16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t -; RV64-NEXT: bltu a2, a0, .LBB79_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: .LBB79_4: +; RV64-NEXT: addi a0, a2, -16 +; RV64-NEXT: sltu a1, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1775,21 +1761,19 @@ ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB80_2 +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: bltu a2, a3, .LBB80_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB80_2: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: addi a3, a2, -16 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: bltu a2, a3, .LBB80_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB80_4: +; RV32-NEXT: addi a1, a2, -16 +; RV32-NEXT: sltu a2, a2, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -1803,52 +1787,50 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vslidedown.vi v0, v24, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v8, v16 -; RV64-NEXT: vsext.vf2 v16, v24 +; RV64-NEXT: vsext.vf2 v16, v0 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsext.vf2 v0, v24 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB80_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB80_2: -; RV64-NEXT: li a3, 0 -; RV64-NEXT: vsll.vi v16, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, a2, -16 -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a1, .LBB80_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB80_4: +; RV64-NEXT: addi a1, a2, -16 +; RV64-NEXT: sltu a2, a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1863,21 +1845,19 @@ ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB81_2 +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: bltu a2, a3, .LBB81_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB81_2: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: addi a3, a2, -16 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: bltu a2, a3, .LBB81_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB81_4: +; RV32-NEXT: addi a1, a2, -16 +; RV32-NEXT: sltu a2, a2, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -1891,53 +1871,51 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v24 +; RV64-NEXT: vsext.vf2 v0, v24 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v24, v24, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v8, v24 +; RV64-NEXT: vsext.vf2 v16, v24 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB81_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB81_2: -; RV64-NEXT: li a3, 0 -; RV64-NEXT: vsll.vi v16, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, a2, -16 -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a1, .LBB81_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB81_4: +; RV64-NEXT: addi a1, a2, -16 +; RV64-NEXT: sltu a2, a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1953,21 +1931,19 @@ ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB82_2 +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: bltu a2, a3, .LBB82_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB82_2: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: addi a3, a2, -16 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: bltu a2, a3, .LBB82_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB82_4: +; RV32-NEXT: addi a1, a2, -16 +; RV32-NEXT: sltu a2, a2, a1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -1981,53 +1957,51 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v24 +; RV64-NEXT: vzext.vf2 v0, v24 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v24, v24, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v8, v24 +; RV64-NEXT: vzext.vf2 v16, v24 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB82_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB82_2: -; RV64-NEXT: li a3, 0 -; RV64-NEXT: vsll.vi v16, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, a2, -16 -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a1, .LBB82_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB82_4: +; RV64-NEXT: addi a1, a2, -16 +; RV64-NEXT: sltu a2, a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -285,24 +285,22 @@ define void @vpstore_v32f64(<32 x double> %val, <32 x double>* %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: bltu a1, a2, .LBB23_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: addi a3, a1, -16 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t -; CHECK-NEXT: bltu a1, a3, .LBB23_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: addi a2, a1, -16 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.vp.store.v32f64.p0v32f64(<32 x double> %val, <32 x double>* %ptr, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -154,59 +154,37 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a2, a2, a4 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: li a4, 128 -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v24, (a1) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v1, v8 -; CHECK-NEXT: addi a1, a1, 128 -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: bltu a3, a4, .LBB11_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv1r.v v2, v8 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle8.v v24, (a1) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: addi a0, a1, 128 +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a3, -128 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: bltu a3, a0, .LBB11_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a0 -; CHECK-NEXT: .LBB11_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: sltu a4, a3, a0 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: bltu a3, a2, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -423,59 +401,45 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a3, a2, -16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a2, a3, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: addi a1, a2, -16 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v24, 2 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a0, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a0, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -606,42 +570,41 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v24, (a0) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, a2, -32 +; CHECK-NEXT: sltu a3, a2, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: bltu a2, a3, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a0, a2, -32 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: bltu a2, a0, .LBB35_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a0 -; CHECK-NEXT: .LBB35_4: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -154,23 +154,21 @@ ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: bltu a0, a2, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v8, 16 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vzext.vf2 v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vzext.vf2 v24, v8, v0.t @@ -183,21 +181,19 @@ define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bltu a0, a2, .LBB13_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v8, 16 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vzext.vf2 v16, v24 -; CHECK-NEXT: bltu a0, a1, .LBB13_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB13_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: .LBB13_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v24, v8 ; CHECK-NEXT: vmv.v.v v8, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -737,67 +737,59 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v2, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -809,37 +801,34 @@ ; CHECK-LABEL: vp_floor_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -737,67 +737,59 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v2, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -809,37 +801,34 @@ ; CHECK-LABEL: vp_round_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -737,67 +737,59 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v2, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -809,37 +801,34 @@ ; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -737,67 +737,59 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v2, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -809,37 +801,34 @@ ; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1086,43 +1086,51 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a1, a3, 1 -; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma -; CHECK-NEXT: slli a5, a3, 3 -; CHECK-NEXT: add a5, a0, a5 -; CHECK-NEXT: vl8re16.v v24, (a5) -; CHECK-NEXT: slli a3, a3, 2 -; CHECK-NEXT: sub a5, a2, a3 -; CHECK-NEXT: vslidedown.vx v0, v0, a1 -; CHECK-NEXT: bltu a2, a5, .LBB85_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: .LBB85_2: -; CHECK-NEXT: vl8re16.v v8, (a0) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re16.v v8, (a3) +; CHECK-NEXT: slli a3, a1, 2 +; CHECK-NEXT: sub a4, a2, a3 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: vl8re16.v v0, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v24, a1 ; CHECK-NEXT: vsetvli zero, a4, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB85_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB85_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB85_4: +; CHECK-NEXT: .LBB85_2: ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vx v16, v2, a1 +; CHECK-NEXT: vslideup.vx v16, v1, a1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -2213,110 +2221,80 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v1, v0, a4 ; CHECK-NEXT: srli a1, a3, 3 -; CHECK-NEXT: slli a5, a3, 3 -; CHECK-NEXT: slli a7, a3, 1 -; CHECK-NEXT: add a4, a2, a5 -; CHECK-NEXT: mv t0, a6 -; CHECK-NEXT: bltu a6, a7, .LBB171_2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: slli t1, a3, 3 +; CHECK-NEXT: add a7, a2, t1 +; CHECK-NEXT: vl8re64.v v8, (a7) +; CHECK-NEXT: mul t0, a3, a5 +; CHECK-NEXT: slli a5, a3, 1 +; CHECK-NEXT: slli t2, a3, 4 +; CHECK-NEXT: mv a7, a6 +; CHECK-NEXT: bltu a6, a5, .LBB171_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv t0, a7 +; CHECK-NEXT: mv a7, a5 ; CHECK-NEXT: .LBB171_2: -; CHECK-NEXT: li t1, 0 -; CHECK-NEXT: vsetvli t2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vl8re64.v v16, (a4) -; CHECK-NEXT: srli a4, a3, 2 -; CHECK-NEXT: sub t2, t0, a3 -; CHECK-NEXT: vslidedown.vx v0, v24, a1 -; CHECK-NEXT: bltu t0, t2, .LBB171_4 +; CHECK-NEXT: add t0, a2, t0 +; CHECK-NEXT: add t1, a0, t1 +; CHECK-NEXT: add t2, a2, t2 +; CHECK-NEXT: vl8re64.v v16, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: sub a2, a7, a3 +; CHECK-NEXT: sltu t3, a7, a2 +; CHECK-NEXT: addi t3, t3, -1 +; CHECK-NEXT: and a2, t3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v2, v16, v8, v0.t +; CHECK-NEXT: bltu a7, a3, .LBB171_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv t1, t2 +; CHECK-NEXT: mv a7, a3 ; CHECK-NEXT: .LBB171_4: -; CHECK-NEXT: li t2, 24 -; CHECK-NEXT: vsetvli t3, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v1, v24, a4 -; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: csrr t3, vlenb -; CHECK-NEXT: slli t3, t3, 3 -; CHECK-NEXT: add t3, sp, t3 -; CHECK-NEXT: addi t3, t3, 16 -; CHECK-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill -; CHECK-NEXT: slli t3, a3, 4 -; CHECK-NEXT: vsetvli zero, t1, e64, m8, ta, ma -; CHECK-NEXT: csrr t1, vlenb -; CHECK-NEXT: slli t1, t1, 4 -; CHECK-NEXT: add t1, sp, t1 -; CHECK-NEXT: addi t1, t1, 16 -; CHECK-NEXT: vl8re8.v v8, (t1) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v2, v8, v16, v0.t -; CHECK-NEXT: bltu t0, a3, .LBB171_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv t0, a3 -; CHECK-NEXT: .LBB171_6: -; CHECK-NEXT: li t1, 0 -; CHECK-NEXT: mul t4, a3, t2 -; CHECK-NEXT: add t2, a2, t3 -; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: li t3, 24 -; CHECK-NEXT: mul t0, t0, t3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vl8re8.v v24, (t0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vl8re8.v v8, (t0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t -; CHECK-NEXT: sub t0, a6, a7 -; CHECK-NEXT: add a7, a1, a1 -; CHECK-NEXT: bltu a6, t0, .LBB171_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv t1, t0 -; CHECK-NEXT: .LBB171_8: -; CHECK-NEXT: add a2, a2, t4 -; CHECK-NEXT: vl8re64.v v8, (t2) -; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v1, a1 +; CHECK-NEXT: vl8re64.v v8, (t0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (t1) +; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li t0, 24 -; CHECK-NEXT: mul a6, a6, t0 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 4 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a5 -; CHECK-NEXT: vsetvli zero, a7, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v17, v2, a1 -; CHECK-NEXT: mv a5, t1 -; CHECK-NEXT: bltu t1, a3, .LBB171_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: .LBB171_10: -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v1, a1 -; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: mul a2, a2, t0 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (t2) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 @@ -2325,10 +2303,10 @@ ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: li a2, 40 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -2338,33 +2316,59 @@ ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v18, v8, v24, v0.t -; CHECK-NEXT: add a0, a4, a1 +; CHECK-NEXT: vmfeq.vv v18, v24, v8, v0.t +; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: sub a0, t1, a3 -; CHECK-NEXT: vslideup.vx v17, v18, a4 -; CHECK-NEXT: bltu t1, a0, .LBB171_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: mv a6, a0 -; CHECK-NEXT: .LBB171_12: -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: sub a0, a6, a5 +; CHECK-NEXT: sltu a2, a6, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vslideup.vx v18, v2, a1 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a3, .LBB171_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB171_6: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: add a2, a4, a1 +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v18, v16, a4 +; CHECK-NEXT: sub a2, a0, a3 +; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: slli a0, a1, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v17, v16, a0 -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vslideup.vx v18, v16, a0 +; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1161,51 +1161,37 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a4, a0, a1 -; CHECK-NEXT: vl8r.v v24, (a4) -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a4, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v25, (a2) -; CHECK-NEXT: sub a4, a3, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a3, a4, .LBB96_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a4 -; CHECK-NEXT: .LBB96_2: -; CHECK-NEXT: vl8r.v v8, (a0) +; CHECK-NEXT: vl8r.v v8, (a4) +; CHECK-NEXT: vl8r.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: vlm.v v0, (a2) +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t -; CHECK-NEXT: bltu a3, a1, .LBB96_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a3, a1, .LBB96_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB96_4: +; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload @@ -1215,8 +1201,7 @@ ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1227,27 +1212,26 @@ define @icmp_eq_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB97_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB97_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB97_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB97_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB97_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i8 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1258,27 +1242,26 @@ define @icmp_eq_vx_swap_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB98_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB98_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB98_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB98_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB98_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i8 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2399,43 +2382,51 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a1, a3, 2 -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a5, a3, 3 -; CHECK-NEXT: add a5, a0, a5 -; CHECK-NEXT: vl8re32.v v24, (a5) -; CHECK-NEXT: slli a3, a3, 1 -; CHECK-NEXT: sub a5, a2, a3 -; CHECK-NEXT: vslidedown.vx v0, v0, a1 -; CHECK-NEXT: bltu a2, a5, .LBB189_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: .LBB189_2: -; CHECK-NEXT: vl8re32.v v8, (a0) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re32.v v8, (a3) +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: sub a4, a2, a3 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vl8re32.v v0, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v24, a1 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB189_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB189_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB189_4: +; CHECK-NEXT: .LBB189_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v16, v2, a1 +; CHECK-NEXT: vslideup.vx v16, v1, a1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -2447,23 +2438,21 @@ ; CHECK-LABEL: icmp_eq_vx_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a3, a3, 1 -; CHECK-NEXT: sub a5, a1, a3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 -; CHECK-NEXT: bltu a1, a5, .LBB190_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: .LBB190_2: +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub a4, a1, a3 +; CHECK-NEXT: sltu a5, a1, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a3, .LBB190_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a3, .LBB190_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB190_4: +; CHECK-NEXT: .LBB190_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t @@ -2482,23 +2471,21 @@ ; CHECK-LABEL: icmp_eq_vx_swap_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a3, a3, 1 -; CHECK-NEXT: sub a5, a1, a3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 -; CHECK-NEXT: bltu a1, a5, .LBB191_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: .LBB191_2: +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub a4, a1, a3 +; CHECK-NEXT: sltu a5, a1, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a3, .LBB191_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a3, .LBB191_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB191_4: +; CHECK-NEXT: .LBB191_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -766,20 +766,18 @@ ; CHECK-RV32-LABEL: strided_load_nxv16f64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: li a2, 0 ; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: sub a6, a3, a4 +; CHECK-RV32-NEXT: sub a2, a3, a4 +; CHECK-RV32-NEXT: sltu a5, a3, a2 +; CHECK-RV32-NEXT: addi a5, a5, -1 +; CHECK-RV32-NEXT: and a2, a5, a2 ; CHECK-RV32-NEXT: srli a5, a4, 3 -; CHECK-RV32-NEXT: bltu a3, a6, .LBB42_2 -; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a2, a6 -; CHECK-RV32-NEXT: .LBB42_2: ; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_4 -; CHECK-RV32-NEXT: # %bb.3: +; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_2 +; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: .LBB42_4: +; CHECK-RV32-NEXT: .LBB42_2: ; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -792,20 +790,18 @@ ; CHECK-RV64-LABEL: strided_load_nxv16f64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: vmv1r.v v8, v0 -; CHECK-RV64-NEXT: li a3, 0 ; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: sub a6, a2, a4 +; CHECK-RV64-NEXT: sub a3, a2, a4 +; CHECK-RV64-NEXT: sltu a5, a2, a3 +; CHECK-RV64-NEXT: addi a5, a5, -1 +; CHECK-RV64-NEXT: and a3, a5, a3 ; CHECK-RV64-NEXT: srli a5, a4, 3 -; CHECK-RV64-NEXT: bltu a2, a6, .LBB42_2 -; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a3, a6 -; CHECK-RV64-NEXT: .LBB42_2: ; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a5 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB42_4 -; CHECK-RV64-NEXT: # %bb.3: +; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB42_2 +; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a2, a4 -; CHECK-RV64-NEXT: .LBB42_4: +; CHECK-RV64-NEXT: .LBB42_2: ; CHECK-RV64-NEXT: mul a4, a2, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -822,16 +818,14 @@ ; CHECK-RV32-LABEL: strided_load_nxv16f64_allones_mask: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: sub a5, a3, a4 -; CHECK-RV32-NEXT: li a2, 0 -; CHECK-RV32-NEXT: bltu a3, a5, .LBB43_2 +; CHECK-RV32-NEXT: sub a2, a3, a4 +; CHECK-RV32-NEXT: sltu a5, a3, a2 +; CHECK-RV32-NEXT: addi a5, a5, -1 +; CHECK-RV32-NEXT: and a2, a5, a2 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB43_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a2, a5 -; CHECK-RV32-NEXT: .LBB43_2: -; CHECK-RV32-NEXT: bltu a3, a4, .LBB43_4 -; CHECK-RV32-NEXT: # %bb.3: ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: .LBB43_4: +; CHECK-RV32-NEXT: .LBB43_2: ; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -843,16 +837,14 @@ ; CHECK-RV64-LABEL: strided_load_nxv16f64_allones_mask: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: sub a5, a2, a4 -; CHECK-RV64-NEXT: li a3, 0 -; CHECK-RV64-NEXT: bltu a2, a5, .LBB43_2 +; CHECK-RV64-NEXT: sub a3, a2, a4 +; CHECK-RV64-NEXT: sltu a5, a2, a3 +; CHECK-RV64-NEXT: addi a5, a5, -1 +; CHECK-RV64-NEXT: and a3, a5, a3 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB43_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a3, a5 -; CHECK-RV64-NEXT: .LBB43_2: -; CHECK-RV64-NEXT: bltu a2, a4, .LBB43_4 -; CHECK-RV64-NEXT: # %bb.3: ; CHECK-RV64-NEXT: mv a2, a4 -; CHECK-RV64-NEXT: .LBB43_4: +; CHECK-RV64-NEXT: .LBB43_2: ; CHECK-RV64-NEXT: mul a4, a2, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -875,48 +867,44 @@ define @strided_load_nxv17f64(double* %ptr, i64 %stride, %mask, i32 zeroext %evl, * %hi_ptr) { ; CHECK-RV32-LABEL: strided_load_nxv17f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: csrr a2, vlenb -; CHECK-RV32-NEXT: slli a7, a2, 1 +; CHECK-RV32-NEXT: csrr a5, vlenb +; CHECK-RV32-NEXT: slli a7, a5, 1 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: mv a5, a3 +; CHECK-RV32-NEXT: mv a2, a3 ; CHECK-RV32-NEXT: bltu a3, a7, .LBB44_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a5, a7 +; CHECK-RV32-NEXT: mv a2, a7 ; CHECK-RV32-NEXT: .LBB44_2: -; CHECK-RV32-NEXT: sub a6, a5, a2 -; CHECK-RV32-NEXT: li t0, 0 -; CHECK-RV32-NEXT: bltu a5, a6, .LBB44_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv t0, a6 -; CHECK-RV32-NEXT: .LBB44_4: -; CHECK-RV32-NEXT: srli a6, a2, 3 +; CHECK-RV32-NEXT: sub a6, a2, a5 +; CHECK-RV32-NEXT: sltu t0, a2, a6 +; CHECK-RV32-NEXT: addi t0, t0, -1 +; CHECK-RV32-NEXT: and t0, t0, a6 +; CHECK-RV32-NEXT: srli a6, a5, 3 ; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-RV32-NEXT: mv a6, a5 -; CHECK-RV32-NEXT: bltu a5, a2, .LBB44_6 -; CHECK-RV32-NEXT: # %bb.5: ; CHECK-RV32-NEXT: mv a6, a2 -; CHECK-RV32-NEXT: .LBB44_6: +; CHECK-RV32-NEXT: bltu a2, a5, .LBB44_4 +; CHECK-RV32-NEXT: # %bb.3: +; CHECK-RV32-NEXT: mv a6, a5 +; CHECK-RV32-NEXT: .LBB44_4: ; CHECK-RV32-NEXT: mul t1, a6, a1 ; CHECK-RV32-NEXT: add t1, a0, t1 ; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v16, (t1), a1, v0.t -; CHECK-RV32-NEXT: li t0, 0 -; CHECK-RV32-NEXT: sub t1, a3, a7 -; CHECK-RV32-NEXT: srli a7, a2, 2 -; CHECK-RV32-NEXT: bltu a3, t1, .LBB44_8 -; CHECK-RV32-NEXT: # %bb.7: -; CHECK-RV32-NEXT: mv t0, t1 -; CHECK-RV32-NEXT: .LBB44_8: -; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a7 -; CHECK-RV32-NEXT: bltu t0, a2, .LBB44_10 -; CHECK-RV32-NEXT: # %bb.9: -; CHECK-RV32-NEXT: mv t0, a2 -; CHECK-RV32-NEXT: .LBB44_10: -; CHECK-RV32-NEXT: mul a2, a5, a1 +; CHECK-RV32-NEXT: sub a7, a3, a7 +; CHECK-RV32-NEXT: sltu a3, a3, a7 +; CHECK-RV32-NEXT: addi a3, a3, -1 +; CHECK-RV32-NEXT: and a3, a3, a7 +; CHECK-RV32-NEXT: bltu a3, a5, .LBB44_6 +; CHECK-RV32-NEXT: # %bb.5: +; CHECK-RV32-NEXT: mv a3, a5 +; CHECK-RV32-NEXT: .LBB44_6: +; CHECK-RV32-NEXT: srli a5, a5, 2 +; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5 +; CHECK-RV32-NEXT: mul a2, a2, a1 ; CHECK-RV32-NEXT: add a2, a0, a2 -; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a2), a1, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 @@ -926,49 +914,45 @@ ; ; CHECK-RV64-LABEL: strided_load_nxv17f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: slli a7, a4, 1 +; CHECK-RV64-NEXT: csrr a5, vlenb +; CHECK-RV64-NEXT: slli a7, a5, 1 ; CHECK-RV64-NEXT: vmv1r.v v8, v0 -; CHECK-RV64-NEXT: mv a5, a2 +; CHECK-RV64-NEXT: mv a4, a2 ; CHECK-RV64-NEXT: bltu a2, a7, .LBB44_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a5, a7 +; CHECK-RV64-NEXT: mv a4, a7 ; CHECK-RV64-NEXT: .LBB44_2: -; CHECK-RV64-NEXT: sub a6, a5, a4 -; CHECK-RV64-NEXT: li t0, 0 -; CHECK-RV64-NEXT: bltu a5, a6, .LBB44_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv t0, a6 -; CHECK-RV64-NEXT: .LBB44_4: -; CHECK-RV64-NEXT: srli a6, a4, 3 +; CHECK-RV64-NEXT: sub a6, a4, a5 +; CHECK-RV64-NEXT: sltu t0, a4, a6 +; CHECK-RV64-NEXT: addi t0, t0, -1 +; CHECK-RV64-NEXT: and t0, t0, a6 +; CHECK-RV64-NEXT: srli a6, a5, 3 ; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-RV64-NEXT: mv a6, a5 -; CHECK-RV64-NEXT: bltu a5, a4, .LBB44_6 -; CHECK-RV64-NEXT: # %bb.5: ; CHECK-RV64-NEXT: mv a6, a4 -; CHECK-RV64-NEXT: .LBB44_6: +; CHECK-RV64-NEXT: bltu a4, a5, .LBB44_4 +; CHECK-RV64-NEXT: # %bb.3: +; CHECK-RV64-NEXT: mv a6, a5 +; CHECK-RV64-NEXT: .LBB44_4: ; CHECK-RV64-NEXT: mul t1, a6, a1 ; CHECK-RV64-NEXT: add t1, a0, t1 ; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v16, (t1), a1, v0.t -; CHECK-RV64-NEXT: li t0, 0 -; CHECK-RV64-NEXT: sub t1, a2, a7 -; CHECK-RV64-NEXT: srli a7, a4, 2 -; CHECK-RV64-NEXT: bltu a2, t1, .LBB44_8 -; CHECK-RV64-NEXT: # %bb.7: -; CHECK-RV64-NEXT: mv t0, t1 -; CHECK-RV64-NEXT: .LBB44_8: -; CHECK-RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a7 -; CHECK-RV64-NEXT: bltu t0, a4, .LBB44_10 -; CHECK-RV64-NEXT: # %bb.9: -; CHECK-RV64-NEXT: mv t0, a4 -; CHECK-RV64-NEXT: .LBB44_10: -; CHECK-RV64-NEXT: mul a2, a5, a1 -; CHECK-RV64-NEXT: add a2, a0, a2 -; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a2), a1, v0.t +; CHECK-RV64-NEXT: sub a7, a2, a7 +; CHECK-RV64-NEXT: sltu a2, a2, a7 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a7 +; CHECK-RV64-NEXT: bltu a2, a5, .LBB44_6 +; CHECK-RV64-NEXT: # %bb.5: +; CHECK-RV64-NEXT: mv a2, a5 +; CHECK-RV64-NEXT: .LBB44_6: +; CHECK-RV64-NEXT: srli a5, a5, 2 +; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a5 +; CHECK-RV64-NEXT: mul a4, a4, a1 +; CHECK-RV64-NEXT: add a4, a0, a4 +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a4), a1, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -621,51 +621,47 @@ define void @strided_store_nxv16f64( %v, double* %ptr, i32 signext %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_store_nxv16f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: csrr a3, vlenb -; CHECK-RV32-NEXT: mv a4, a2 -; CHECK-RV32-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: mv a3, a2 +; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a4, a3 +; CHECK-RV32-NEXT: mv a3, a4 ; CHECK-RV32-NEXT: .LBB34_2: -; CHECK-RV32-NEXT: li a5, 0 -; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV32-NEXT: srli a6, a3, 3 -; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: sub a3, a2, a3 -; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a6 -; CHECK-RV32-NEXT: bltu a2, a3, .LBB34_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a5, a3 -; CHECK-RV32-NEXT: .LBB34_4: -; CHECK-RV32-NEXT: mul a2, a4, a1 -; CHECK-RV32-NEXT: add a0, a0, a2 -; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV32-NEXT: sub a5, a2, a4 +; CHECK-RV32-NEXT: sltu a2, a2, a5 +; CHECK-RV32-NEXT: addi a2, a2, -1 +; CHECK-RV32-NEXT: and a2, a2, a5 +; CHECK-RV32-NEXT: srli a4, a4, 3 +; CHECK-RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-RV32-NEXT: mul a3, a3, a1 +; CHECK-RV32-NEXT: add a0, a0, a3 +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_store_nxv16f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: csrr a3, vlenb -; CHECK-RV64-NEXT: mv a4, a2 -; CHECK-RV64-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: mv a3, a2 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a4, a3 +; CHECK-RV64-NEXT: mv a3, a4 ; CHECK-RV64-NEXT: .LBB34_2: -; CHECK-RV64-NEXT: li a5, 0 -; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV64-NEXT: srli a6, a3, 3 -; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: sub a3, a2, a3 -; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a6 -; CHECK-RV64-NEXT: bltu a2, a3, .LBB34_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a5, a3 -; CHECK-RV64-NEXT: .LBB34_4: -; CHECK-RV64-NEXT: mul a2, a4, a1 -; CHECK-RV64-NEXT: add a0, a0, a2 -; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV64-NEXT: sub a5, a2, a4 +; CHECK-RV64-NEXT: sltu a2, a2, a5 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a5 +; CHECK-RV64-NEXT: srli a4, a4, 3 +; CHECK-RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-RV64-NEXT: mul a3, a3, a1 +; CHECK-RV64-NEXT: add a0, a0, a3 +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV64-NEXT: ret call void @llvm.experimental.vp.strided.store.nxv16f64.p0f64.i32( %v, double* %ptr, i32 %stride, %mask, i32 %evl) @@ -681,17 +677,15 @@ ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 ; CHECK-RV32-NEXT: .LBB35_2: -; CHECK-RV32-NEXT: li a5, 0 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: sub a4, a2, a4 ; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1 -; CHECK-RV32-NEXT: bltu a2, a4, .LBB35_4 -; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a5, a4 -; CHECK-RV32-NEXT: .LBB35_4: -; CHECK-RV32-NEXT: mul a2, a3, a1 -; CHECK-RV32-NEXT: add a0, a0, a2 -; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV32-NEXT: sub a4, a2, a4 +; CHECK-RV32-NEXT: sltu a2, a2, a4 +; CHECK-RV32-NEXT: addi a2, a2, -1 +; CHECK-RV32-NEXT: and a2, a2, a4 +; CHECK-RV32-NEXT: mul a3, a3, a1 +; CHECK-RV32-NEXT: add a0, a0, a3 +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1 ; CHECK-RV32-NEXT: ret ; @@ -703,17 +697,15 @@ ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a3, a4 ; CHECK-RV64-NEXT: .LBB35_2: -; CHECK-RV64-NEXT: li a5, 0 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: sub a4, a2, a4 ; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB35_4 -; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a5, a4 -; CHECK-RV64-NEXT: .LBB35_4: -; CHECK-RV64-NEXT: mul a2, a3, a1 -; CHECK-RV64-NEXT: add a0, a0, a2 -; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV64-NEXT: sub a4, a2, a4 +; CHECK-RV64-NEXT: sltu a2, a2, a4 +; CHECK-RV64-NEXT: addi a2, a2, -1 +; CHECK-RV64-NEXT: and a2, a2, a4 +; CHECK-RV64-NEXT: mul a3, a3, a1 +; CHECK-RV64-NEXT: add a0, a0, a3 +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1 ; CHECK-RV64-NEXT: ret %one = insertelement poison, i1 true, i32 0 @@ -728,55 +720,51 @@ define void @strided_store_nxv17f64( %v, double* %ptr, i32 signext %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_store_nxv17f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: slli a4, a4, 3 -; CHECK-RV32-NEXT: sub sp, sp, a4 ; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: slli a7, a4, 1 +; CHECK-RV32-NEXT: slli a6, a4, 1 ; CHECK-RV32-NEXT: vmv1r.v v24, v0 -; CHECK-RV32-NEXT: addi a5, sp, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: mv a6, a3 -; CHECK-RV32-NEXT: bltu a3, a7, .LBB36_2 +; CHECK-RV32-NEXT: mv a5, a3 +; CHECK-RV32-NEXT: bltu a3, a6, .LBB36_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a6, a7 -; CHECK-RV32-NEXT: .LBB36_2: ; CHECK-RV32-NEXT: mv a5, a6 -; CHECK-RV32-NEXT: bltu a6, a4, .LBB36_4 +; CHECK-RV32-NEXT: .LBB36_2: +; CHECK-RV32-NEXT: mv a7, a5 +; CHECK-RV32-NEXT: bltu a5, a4, .LBB36_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a5, a4 +; CHECK-RV32-NEXT: mv a7, a4 ; CHECK-RV32-NEXT: .LBB36_4: -; CHECK-RV32-NEXT: li t0, 0 -; CHECK-RV32-NEXT: vl8re64.v v16, (a0) -; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr t0, vlenb +; CHECK-RV32-NEXT: slli t0, t0, 3 +; CHECK-RV32-NEXT: sub sp, sp, t0 +; CHECK-RV32-NEXT: vl8re64.v v0, (a0) +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v24 ; CHECK-RV32-NEXT: vsse64.v v8, (a1), a2, v0.t -; CHECK-RV32-NEXT: sub a7, a3, a7 -; CHECK-RV32-NEXT: srli a0, a4, 2 -; CHECK-RV32-NEXT: bltu a3, a7, .LBB36_6 +; CHECK-RV32-NEXT: sub a0, a5, a4 +; CHECK-RV32-NEXT: sltu t0, a5, a0 +; CHECK-RV32-NEXT: addi t0, t0, -1 +; CHECK-RV32-NEXT: and a0, t0, a0 +; CHECK-RV32-NEXT: srli t0, a4, 3 +; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v24, t0 +; CHECK-RV32-NEXT: mul a7, a7, a2 +; CHECK-RV32-NEXT: add a7, a1, a7 +; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-RV32-NEXT: sub a0, a3, a6 +; CHECK-RV32-NEXT: sltu a3, a3, a0 +; CHECK-RV32-NEXT: addi a3, a3, -1 +; CHECK-RV32-NEXT: and a0, a3, a0 +; CHECK-RV32-NEXT: vsse64.v v16, (a7), a2, v0.t +; CHECK-RV32-NEXT: bltu a0, a4, .LBB36_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: mv t0, a7 +; CHECK-RV32-NEXT: mv a0, a4 ; CHECK-RV32-NEXT: .LBB36_6: -; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a0 -; CHECK-RV32-NEXT: bltu t0, a4, .LBB36_8 -; CHECK-RV32-NEXT: # %bb.7: -; CHECK-RV32-NEXT: mv t0, a4 -; CHECK-RV32-NEXT: .LBB36_8: -; CHECK-RV32-NEXT: li a0, 0 -; CHECK-RV32-NEXT: mul a3, a6, a2 -; CHECK-RV32-NEXT: add a7, a1, a3 -; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma -; CHECK-RV32-NEXT: sub a3, a6, a4 -; CHECK-RV32-NEXT: vsse64.v v16, (a7), a2, v0.t -; CHECK-RV32-NEXT: bltu a6, a3, .LBB36_10 -; CHECK-RV32-NEXT: # %bb.9: -; CHECK-RV32-NEXT: mv a0, a3 -; CHECK-RV32-NEXT: .LBB36_10: -; CHECK-RV32-NEXT: srli a3, a4, 3 -; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: srli a3, a4, 2 +; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a3 ; CHECK-RV32-NEXT: mul a3, a5, a2 ; CHECK-RV32-NEXT: add a1, a1, a3 @@ -792,55 +780,51 @@ ; ; CHECK-RV64-LABEL: strided_store_nxv17f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: addi sp, sp, -16 -; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: slli a4, a4, 3 -; CHECK-RV64-NEXT: sub sp, sp, a4 ; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: slli a7, a4, 1 +; CHECK-RV64-NEXT: slli a6, a4, 1 ; CHECK-RV64-NEXT: vmv1r.v v24, v0 -; CHECK-RV64-NEXT: addi a5, sp, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: mv a6, a3 -; CHECK-RV64-NEXT: bltu a3, a7, .LBB36_2 +; CHECK-RV64-NEXT: mv a5, a3 +; CHECK-RV64-NEXT: bltu a3, a6, .LBB36_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a6, a7 -; CHECK-RV64-NEXT: .LBB36_2: ; CHECK-RV64-NEXT: mv a5, a6 -; CHECK-RV64-NEXT: bltu a6, a4, .LBB36_4 +; CHECK-RV64-NEXT: .LBB36_2: +; CHECK-RV64-NEXT: mv a7, a5 +; CHECK-RV64-NEXT: bltu a5, a4, .LBB36_4 ; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a5, a4 +; CHECK-RV64-NEXT: mv a7, a4 ; CHECK-RV64-NEXT: .LBB36_4: -; CHECK-RV64-NEXT: li t0, 0 -; CHECK-RV64-NEXT: vl8re64.v v16, (a0) -; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-RV64-NEXT: addi sp, sp, -16 +; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV64-NEXT: csrr t0, vlenb +; CHECK-RV64-NEXT: slli t0, t0, 3 +; CHECK-RV64-NEXT: sub sp, sp, t0 +; CHECK-RV64-NEXT: vl8re64.v v0, (a0) +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v24 ; CHECK-RV64-NEXT: vsse64.v v8, (a1), a2, v0.t -; CHECK-RV64-NEXT: sub a7, a3, a7 -; CHECK-RV64-NEXT: srli a0, a4, 2 -; CHECK-RV64-NEXT: bltu a3, a7, .LBB36_6 +; CHECK-RV64-NEXT: sub a0, a5, a4 +; CHECK-RV64-NEXT: sltu t0, a5, a0 +; CHECK-RV64-NEXT: addi t0, t0, -1 +; CHECK-RV64-NEXT: and a0, t0, a0 +; CHECK-RV64-NEXT: srli t0, a4, 3 +; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v24, t0 +; CHECK-RV64-NEXT: mul a7, a7, a2 +; CHECK-RV64-NEXT: add a7, a1, a7 +; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-RV64-NEXT: sub a0, a3, a6 +; CHECK-RV64-NEXT: sltu a3, a3, a0 +; CHECK-RV64-NEXT: addi a3, a3, -1 +; CHECK-RV64-NEXT: and a0, a3, a0 +; CHECK-RV64-NEXT: vsse64.v v16, (a7), a2, v0.t +; CHECK-RV64-NEXT: bltu a0, a4, .LBB36_6 ; CHECK-RV64-NEXT: # %bb.5: -; CHECK-RV64-NEXT: mv t0, a7 +; CHECK-RV64-NEXT: mv a0, a4 ; CHECK-RV64-NEXT: .LBB36_6: -; CHECK-RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a0 -; CHECK-RV64-NEXT: bltu t0, a4, .LBB36_8 -; CHECK-RV64-NEXT: # %bb.7: -; CHECK-RV64-NEXT: mv t0, a4 -; CHECK-RV64-NEXT: .LBB36_8: -; CHECK-RV64-NEXT: li a0, 0 -; CHECK-RV64-NEXT: mul a3, a6, a2 -; CHECK-RV64-NEXT: add a7, a1, a3 -; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma -; CHECK-RV64-NEXT: sub a3, a6, a4 -; CHECK-RV64-NEXT: vsse64.v v16, (a7), a2, v0.t -; CHECK-RV64-NEXT: bltu a6, a3, .LBB36_10 -; CHECK-RV64-NEXT: # %bb.9: -; CHECK-RV64-NEXT: mv a0, a3 -; CHECK-RV64-NEXT: .LBB36_10: -; CHECK-RV64-NEXT: srli a3, a4, 3 -; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: srli a3, a4, 2 +; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a3 ; CHECK-RV64-NEXT: mul a3, a5, a2 ; CHECK-RV64-NEXT: add a1, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -645,26 +645,24 @@ define @vadd_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: bltu a1, a2, .LBB50_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub a2, a1, a0 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a1, a0, .LBB50_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a1, a2 -; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: bltu a1, a0, .LBB50_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a0 -; CHECK-NEXT: .LBB50_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -677,21 +675,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: bltu a0, a1, .LBB51_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 -; CHECK-NEXT: bltu a0, a1, .LBB51_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB51_4: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1545,23 +1540,21 @@ ; CHECK-LABEL: vadd_vi_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB118_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB118_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB118_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB118_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB118_4: +; CHECK-NEXT: .LBB118_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t @@ -1577,21 +1570,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: bltu a0, a1, .LBB119_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB119_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 -; CHECK-NEXT: bltu a0, a1, .LBB119_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB119_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1612,23 +1602,21 @@ ; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a4, a0, 2 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB120_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB120_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB120_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB120_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB120_4: +; CHECK-NEXT: .LBB120_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -395,22 +395,20 @@ ; CHECK-LABEL: vfabs_vv_nxv16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfabs.v v8, v8, v0.t @@ -423,21 +421,18 @@ ; CHECK-LABEL: vfabs_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1200,13 +1200,12 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1215,42 +1214,35 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a5, a1, 3 -; CHECK-NEXT: add a6, a2, a5 -; CHECK-NEXT: vl8re64.v v8, (a6) -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: li a6, 24 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: srli a6, a1, 3 ; CHECK-NEXT: sub a5, a4, a1 +; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: srli a6, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re64.v v8, (a3) +; CHECK-NEXT: vl8re64.v v16, (a2) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vslidedown.vx v0, v0, a6 -; CHECK-NEXT: bltu a4, a5, .LBB92_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: .LBB92_2: -; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 @@ -1258,27 +1250,21 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a1, .LBB92_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a4, a1, .LBB92_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB92_4: +; CHECK-NEXT: .LBB92_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb @@ -1287,21 +1273,21 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1320,53 +1306,50 @@ ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v24, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a4, a5, .LBB93_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: .LBB93_2: +; CHECK-NEXT: vl8re64.v v16, (a3) +; CHECK-NEXT: sub a3, a4, a1 +; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: and a0, a5, a3 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v24, v16, v8 -; CHECK-NEXT: bltu a4, a1, .LBB93_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: vfmadd.vv v16, v8, v24 +; CHECK-NEXT: bltu a4, a1, .LBB93_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB93_4: +; CHECK-NEXT: .LBB93_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v0, v16, v8 +; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 -; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1200,13 +1200,12 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1215,42 +1214,35 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a5, a1, 3 -; CHECK-NEXT: add a6, a2, a5 -; CHECK-NEXT: vl8re64.v v8, (a6) -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: li a6, 24 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: srli a6, a1, 3 ; CHECK-NEXT: sub a5, a4, a1 +; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: srli a6, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re64.v v8, (a3) +; CHECK-NEXT: vl8re64.v v16, (a2) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vslidedown.vx v0, v0, a6 -; CHECK-NEXT: bltu a4, a5, .LBB92_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: .LBB92_2: -; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 @@ -1258,27 +1250,21 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a1, .LBB92_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a4, a1, .LBB92_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB92_4: +; CHECK-NEXT: .LBB92_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb @@ -1287,21 +1273,21 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1320,53 +1306,50 @@ ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v24, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bltu a4, a5, .LBB93_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: .LBB93_2: +; CHECK-NEXT: vl8re64.v v16, (a3) +; CHECK-NEXT: sub a3, a4, a1 +; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: and a0, a5, a3 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v24, v16, v8 -; CHECK-NEXT: bltu a4, a1, .LBB93_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: vfmadd.vv v16, v8, v24 +; CHECK-NEXT: bltu a4, a1, .LBB93_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB93_4: +; CHECK-NEXT: .LBB93_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v0, v16, v8 +; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 -; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -395,22 +395,20 @@ ; CHECK-LABEL: vfneg_vv_nxv16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfneg.v v8, v8, v0.t @@ -423,21 +421,18 @@ ; CHECK-LABEL: vfneg_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfneg.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfneg.v v16, v16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll @@ -95,23 +95,21 @@ ; CHECK-LABEL: vfpext_nxv32f16_nxv32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB7_4: +; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -322,23 +322,21 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 @@ -359,23 +357,21 @@ ; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t @@ -389,21 +385,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i32.nxv32f32( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -322,23 +322,21 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 @@ -359,23 +357,21 @@ ; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t @@ -389,21 +385,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i32.nxv32f32( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fptrunc.nxv2f16.nxv2f32(, , i32) @@ -99,30 +99,29 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB7_4: +; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -140,90 +139,86 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: mv a5, a2 -; CHECK-NEXT: bltu a2, a4, .LBB8_2 +; CHECK-NEXT: srli a3, a1, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v25, v0, a3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re64.v v8, (a3) +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: sub a4, a2, a3 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 +; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a6, a6, a5 +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v25, a5 +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t +; CHECK-NEXT: bltu a4, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a7, a5, a1 -; CHECK-NEXT: vslidedown.vx v0, v24, a3 -; CHECK-NEXT: bltu a5, a7, .LBB8_4 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v26, v1, a5 +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB8_4: -; CHECK-NEXT: srli a7, a1, 2 -; CHECK-NEXT: slli t0, a1, 3 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB8_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v1, v24, a7 -; CHECK-NEXT: add a7, a0, t0 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: sub a4, a2, a4 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB8_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv a6, a4 -; CHECK-NEXT: .LBB8_8: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vl8re64.v v16, (a7) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: sub a4, a6, a1 -; CHECK-NEXT: vslidedown.vx v0, v1, a3 -; CHECK-NEXT: bltu a6, a4, .LBB8_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: mv a2, a4 -; CHECK-NEXT: .LBB8_10: -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT: sub a0, a2, a1 +; CHECK-NEXT: sltu a3, a2, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a6, a1, .LBB8_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: mv a6, a1 -; CHECK-NEXT: .LBB8_12: -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v12, v24, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB8_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: .LBB8_6: +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v8, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -395,22 +395,20 @@ ; CHECK-LABEL: vfsqrt_vv_nxv16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t @@ -423,21 +421,18 @@ ; CHECK-LABEL: vfsqrt_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB33_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfsqrt.v v16, v16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -443,26 +443,24 @@ define @vmax_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB34_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB34_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -475,21 +473,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB35_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB35_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vmax.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1057,23 +1052,21 @@ ; CHECK-LABEL: vmax_vx_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB80_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB80_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t @@ -1089,21 +1082,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB81_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB81_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB81_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB81_4: -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmax.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1124,23 +1114,21 @@ ; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB82_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -442,26 +442,24 @@ define @vmaxu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB34_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB34_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -474,21 +472,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB35_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB35_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vmaxu.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1056,23 +1051,21 @@ ; CHECK-LABEL: vmaxu_vx_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB80_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB80_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t @@ -1088,21 +1081,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB81_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB81_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB81_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB81_4: -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmaxu.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1123,23 +1113,21 @@ ; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB82_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -443,26 +443,24 @@ define @vmin_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB34_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB34_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -475,21 +473,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB35_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB35_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vmin.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1057,23 +1052,21 @@ ; CHECK-LABEL: vmin_vx_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB80_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB80_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t @@ -1089,21 +1082,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB81_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB81_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB81_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB81_4: -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmin.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1124,23 +1114,21 @@ ; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB82_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -442,26 +442,24 @@ define @vminu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t -; CHECK-NEXT: bltu a2, a1, .LBB34_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB34_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -474,21 +472,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB35_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB35_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vminu.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1056,23 +1051,21 @@ ; CHECK-LABEL: vminu_vx_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB80_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB80_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t @@ -1088,21 +1081,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a0 ; CHECK-NEXT: bltu a1, a2, .LBB81_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB81_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: bltu a1, a2, .LBB81_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB81_4: -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vminu.vx v16, v16, a0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1123,23 +1113,21 @@ ; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB82_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -257,25 +257,23 @@ ; RV32-LABEL: vpgather_baseidx_nxv32i8: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a5, a2, 2 -; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a2, a3, 1 ; RV32-NEXT: sub a4, a1, a2 -; RV32-NEXT: vslidedown.vx v0, v0, a5 -; RV32-NEXT: bltu a1, a4, .LBB12_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: .LBB12_2: -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: sltu a5, a1, a4 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 +; RV32-NEXT: srli a3, a3, 2 +; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a3 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v24, v10 -; RV32-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV32-NEXT: vsetvli zero, a4, e8, m2, ta, ma ; RV32-NEXT: vluxei32.v v18, (a0), v24, v0.t -; RV32-NEXT: bltu a1, a2, .LBB12_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a1, a2, .LBB12_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB12_4: +; RV32-NEXT: .LBB12_2: ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v24, v8 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -286,66 +284,61 @@ ; ; RV64-LABEL: vpgather_baseidx_nxv32i8: ; RV64: # %bb.0: -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a5, a3, 1 -; RV64-NEXT: sub a6, a1, a5 -; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a4, 0 -; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a6, .LBB12_2 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a4, a2, 1 +; RV64-NEXT: sub a3, a1, a4 +; RV64-NEXT: sltu a5, a1, a3 +; RV64-NEXT: addi a5, a5, -1 +; RV64-NEXT: and a3, a5, a3 +; RV64-NEXT: vmv1r.v v17, v0 +; RV64-NEXT: mv a5, a3 +; RV64-NEXT: bltu a3, a2, .LBB12_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a6 +; RV64-NEXT: mv a5, a2 ; RV64-NEXT: .LBB12_2: -; RV64-NEXT: sub a6, a2, a3 -; RV64-NEXT: mv a7, a4 -; RV64-NEXT: bltu a2, a6, .LBB12_4 +; RV64-NEXT: srli a6, a2, 2 +; RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v16, v17, a6 +; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v10 +; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t +; RV64-NEXT: bltu a1, a4, .LBB12_4 ; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a7, a6 +; RV64-NEXT: mv a1, a4 ; RV64-NEXT: .LBB12_4: -; RV64-NEXT: srli a6, a3, 2 -; RV64-NEXT: vsetvli t0, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v13, v12, a6 -; RV64-NEXT: srli a6, a3, 3 -; RV64-NEXT: vsetvli t0, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v13, a6 -; RV64-NEXT: vsetvli t0, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v11 -; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v19, (a0), v24, v0.t -; RV64-NEXT: bltu a1, a5, .LBB12_6 +; RV64-NEXT: sub a4, a1, a2 +; RV64-NEXT: sltu a5, a1, a4 +; RV64-NEXT: addi a5, a5, -1 +; RV64-NEXT: and a5, a5, a4 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v17, a4 +; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v9 +; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma +; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t +; RV64-NEXT: bltu a1, a2, .LBB12_6 ; RV64-NEXT: # %bb.5: -; RV64-NEXT: mv a1, a5 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB12_6: -; RV64-NEXT: sub a5, a1, a3 -; RV64-NEXT: bltu a1, a5, .LBB12_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: mv a4, a5 -; RV64-NEXT: .LBB12_8: -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v12, a6 ; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v9 -; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v17, (a0), v24, v0.t -; RV64-NEXT: bltu a1, a3, .LBB12_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: mv a1, a3 -; RV64-NEXT: .LBB12_10: -; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a3, .LBB12_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB12_12: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v10 -; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v13 -; RV64-NEXT: vluxei64.v v18, (a0), v24, v0.t -; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: vmv1r.v v0, v17 +; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: sub a1, a3, a2 +; RV64-NEXT: sltu a2, a3, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v16, a4 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v11 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, %idxs %v = call @llvm.vp.gather.nxv32i8.nxv32p0i8( %ptrs, %m, i32 %evl) @@ -2295,22 +2288,20 @@ ; RV32-LABEL: vpgather_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v24, v0 -; RV32-NEXT: li a2, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a4, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a3, a0, a1 -; RV32-NEXT: vslidedown.vx v0, v0, a4 -; RV32-NEXT: bltu a0, a3, .LBB102_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB102_2: +; RV32-NEXT: sub a2, a0, a1 +; RV32-NEXT: sltu a3, a0, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: srli a3, a1, 3 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t -; RV32-NEXT: bltu a0, a1, .LBB102_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a0, a1, .LBB102_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB102_4: +; RV32-NEXT: .LBB102_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t @@ -2320,22 +2311,20 @@ ; RV64-LABEL: vpgather_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: li a2, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a4, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a3, a0, a1 -; RV64-NEXT: vslidedown.vx v0, v0, a4 -; RV64-NEXT: bltu a0, a3, .LBB102_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB102_2: +; RV64-NEXT: sub a2, a0, a1 +; RV64-NEXT: sltu a3, a0, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: srli a3, a1, 3 +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t -; RV64-NEXT: bltu a0, a1, .LBB102_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a0, a1, .LBB102_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB102_4: +; RV64-NEXT: .LBB102_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t @@ -2348,25 +2337,23 @@ ; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a5, a2, 3 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a4, a1, a2 -; RV32-NEXT: vslidedown.vx v0, v0, a5 -; RV32-NEXT: bltu a1, a4, .LBB103_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: .LBB103_2: +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: srli a4, a2, 3 +; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB103_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a1, a2, .LBB103_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB103_4: +; RV32-NEXT: .LBB103_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2375,28 +2362,25 @@ ; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a3, 0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: srli a5, a2, 3 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a4, a1, a2 -; RV64-NEXT: vslidedown.vx v0, v0, a5 -; RV64-NEXT: bltu a1, a4, .LBB103_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: .LBB103_2: -; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a2, .LBB103_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB103_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB103_4: +; RV64-NEXT: .LBB103_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2410,25 +2394,23 @@ ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a5, a2, 3 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a4, a1, a2 -; RV32-NEXT: vslidedown.vx v0, v0, a5 -; RV32-NEXT: bltu a1, a4, .LBB104_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: .LBB104_2: +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: srli a4, a2, 3 +; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB104_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB104_4: +; RV32-NEXT: .LBB104_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2437,28 +2419,25 @@ ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a3, 0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: srli a5, a2, 3 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a4, a1, a2 -; RV64-NEXT: vslidedown.vx v0, v0, a5 -; RV64-NEXT: bltu a1, a4, .LBB104_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: .LBB104_2: -; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a2, .LBB104_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB104_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB104_4: +; RV64-NEXT: .LBB104_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2473,25 +2452,23 @@ ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vzext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a5, a2, 3 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a4, a1, a2 -; RV32-NEXT: vslidedown.vx v0, v0, a5 -; RV32-NEXT: bltu a1, a4, .LBB105_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: .LBB105_2: +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: srli a4, a2, 3 +; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB105_4 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB105_4: +; RV32-NEXT: .LBB105_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2500,28 +2477,25 @@ ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: li a3, 0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vzext.vf4 v16, v10 ; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: srli a5, a2, 3 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a4, a1, a2 -; RV64-NEXT: vslidedown.vx v0, v0, a5 -; RV64-NEXT: bltu a1, a4, .LBB105_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: .LBB105_2: -; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a2, .LBB105_4 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: bltu a1, a2, .LBB105_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB105_4: +; RV64-NEXT: .LBB105_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -454,24 +454,22 @@ ; CHECK-LABEL: vpload_nxv16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a5, a2, 3 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB37_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB37_2: +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a4, a2, 3 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a4 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: bltu a1, a2, .LBB37_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB37_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB37_4: +; CHECK-NEXT: .LBB37_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a0), v0.t @@ -502,40 +500,36 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: .LBB38_2: -; CHECK-NEXT: sub a7, a4, a3 -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: bltu a4, a7, .LBB38_4 +; CHECK-NEXT: sub a6, a4, a3 +; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a6, a7, a6 +; CHECK-NEXT: srli a7, a3, 3 +; CHECK-NEXT: vsetvli t0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a7 +; CHECK-NEXT: slli a7, a3, 3 +; CHECK-NEXT: add a7, a0, a7 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a7), v0.t +; CHECK-NEXT: sub a5, a2, a5 +; CHECK-NEXT: sltu a2, a2, a5 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a2, a2, a5 +; CHECK-NEXT: bltu a2, a3, .LBB38_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB38_4: -; CHECK-NEXT: li a7, 0 -; CHECK-NEXT: srli t0, a3, 3 -; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v8, t0 -; CHECK-NEXT: slli t0, a3, 3 -; CHECK-NEXT: add t0, a0, t0 -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (t0), v0.t -; CHECK-NEXT: srli a6, a3, 2 -; CHECK-NEXT: sub t0, a2, a5 +; CHECK-NEXT: srli a5, a3, 2 +; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a5 ; CHECK-NEXT: slli a5, a3, 4 -; CHECK-NEXT: bltu a2, t0, .LBB38_6 +; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: bltu a4, a3, .LBB38_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a7, t0 -; CHECK-NEXT: .LBB38_6: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-NEXT: add a2, a0, a5 -; CHECK-NEXT: bltu a7, a3, .LBB38_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv a7, a3 -; CHECK-NEXT: .LBB38_8: -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a2), v0.t -; CHECK-NEXT: bltu a4, a3, .LBB38_10 -; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB38_10: +; CHECK-NEXT: .LBB38_6: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a0), v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -368,110 +368,44 @@ declare @llvm.vp.merge.nxv128i8(, , , i32) define @vpmerge_vv_nxv128i8( %va, %vb, %m, i32 zeroext %evl) { -; RV32-LABEL: vpmerge_vv_nxv128i8: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a4, a0, a1 -; RV32-NEXT: vl8r.v v24, (a4) -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a4, zero, e8, m8, ta, ma -; RV32-NEXT: vlm.v v2, (a2) -; RV32-NEXT: sub a4, a3, a1 -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a3, a4, .LBB28_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: .LBB28_2: -; RV32-NEXT: vl8r.v v8, (a0) -; RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; RV32-NEXT: vmv1r.v v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV32-NEXT: bltu a3, a1, .LBB28_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB28_4: -; RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vpmerge_vv_nxv128i8: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a4, a0, a1 -; RV64-NEXT: vl8r.v v24, (a4) -; RV64-NEXT: vsetvli a4, zero, e8, m8, ta, ma -; RV64-NEXT: vlm.v v2, (a2) -; RV64-NEXT: sub a4, a3, a1 -; RV64-NEXT: vmv1r.v v1, v0 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a3, a4, .LBB28_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a4 -; RV64-NEXT: .LBB28_2: -; RV64-NEXT: vl8r.v v8, (a0) -; RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 -; RV64-NEXT: bltu a3, a1, .LBB28_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB28_4: -; RV64-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; RV64-NEXT: vmv1r.v v0, v1 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV64-NEXT: vmv8r.v v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vpmerge_vv_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a4, a0, a1 +; CHECK-NEXT: vl8r.v v16, (a4) +; CHECK-NEXT: vl8r.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: vlm.v v0, (a2) +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: bltu a3, a1, .LBB28_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: .LBB28_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call @llvm.vp.merge.nxv128i8( %m, %va, %vb, i32 %evl) ret %v } @@ -479,26 +413,24 @@ define @vpmerge_vx_nxv128i8(i8 %a, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB29_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0 +; CHECK-NEXT: bltu a2, a1, .LBB29_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, ma -; CHECK-NEXT: sub a1, a2, a3 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: bltu a2, a1, .LBB29_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB29_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer @@ -509,26 +441,24 @@ define @vpmerge_vi_nxv128i8( %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: bltu a1, a2, .LBB30_2 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub a2, a1, a0 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vim v16, v16, 2, v0 +; CHECK-NEXT: bltu a1, a0, .LBB30_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: .LBB30_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; CHECK-NEXT: sub a0, a1, a2 -; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 -; CHECK-NEXT: bltu a1, a0, .LBB30_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a0 -; CHECK-NEXT: .LBB30_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmerge.vim v16, v16, 2, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 2, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2039,18 +2039,16 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: .LBB95_2: -; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t -; RV32-NEXT: srli a2, a0, 3 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: bltu a1, a0, .LBB95_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: .LBB95_4: -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: sub a2, a1, a0 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret ; @@ -2061,33 +2059,31 @@ ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vl8re64.v v16, (a0) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a3, a1, 3 -; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: mv a3, a2 +; RV64-NEXT: add a3, a0, a3 +; RV64-NEXT: vl8re64.v v24, (a3) +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vl8re64.v v24, (a0) +; RV64-NEXT: mv a0, a2 ; RV64-NEXT: bltu a2, a1, .LBB95_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a3, a1 +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB95_2: -; RV64-NEXT: li a4, 0 -; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t -; RV64-NEXT: srli a3, a1, 3 -; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: sub a0, a2, a1 -; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a0, .LBB95_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: .LBB95_4: -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: sltu a2, a2, a0 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t +; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 @@ -2110,18 +2106,16 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB96_2: -; RV32-NEXT: li a4, 0 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: srli a3, a1, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a1, a2, a1 -; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a2, a1, .LBB96_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB96_4: -; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV32-NEXT: sub a3, a2, a1 +; RV32-NEXT: sltu a2, a2, a3 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -2130,39 +2124,47 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vl4re16.v v4, (a1) -; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl4re16.v v24, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v4 +; RV64-NEXT: vsext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vsext.vf4 v16, v24 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: vsext.vf4 v24, v6 ; RV64-NEXT: bltu a2, a1, .LBB96_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB96_2: -; RV64-NEXT: li a4, 0 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: srli a3, a1, 3 -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a1, a2, a1 -; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a1, .LBB96_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a4, a1 -; RV64-NEXT: .LBB96_4: -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: sub a3, a2, a1 +; RV64-NEXT: sltu a2, a2, a3 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2184,18 +2186,16 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB97_2: -; RV32-NEXT: li a4, 0 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: srli a3, a1, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a1, a2, a1 -; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a2, a1, .LBB97_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB97_4: -; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV32-NEXT: sub a3, a2, a1 +; RV32-NEXT: sltu a2, a2, a3 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -2204,39 +2204,47 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vl4re16.v v4, (a1) -; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl4re16.v v24, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v4 +; RV64-NEXT: vsext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vsext.vf4 v16, v24 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: vsext.vf4 v24, v6 ; RV64-NEXT: bltu a2, a1, .LBB97_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB97_2: -; RV64-NEXT: li a4, 0 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: srli a3, a1, 3 -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a1, a2, a1 -; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a1, .LBB97_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a4, a1 -; RV64-NEXT: .LBB97_4: -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: sub a3, a2, a1 +; RV64-NEXT: sltu a2, a2, a3 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2259,18 +2267,16 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB98_2: -; RV32-NEXT: li a4, 0 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: srli a3, a1, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV32-NEXT: sub a1, a2, a1 -; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a2, a1, .LBB98_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB98_4: -; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV32-NEXT: sub a3, a2, a1 +; RV32-NEXT: sltu a2, a2, a3 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -2279,39 +2285,47 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vl4re16.v v4, (a1) -; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl4re16.v v24, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v4 +; RV64-NEXT: vzext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vzext.vf4 v16, v24 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: vzext.vf4 v24, v6 ; RV64-NEXT: bltu a2, a1, .LBB98_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB98_2: -; RV64-NEXT: li a4, 0 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: srli a3, a1, 3 -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: sub a1, a2, a1 -; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a1, .LBB98_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a4, a1 -; RV64-NEXT: .LBB98_4: -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: sub a3, a2, a1 +; RV64-NEXT: sltu a2, a2, a3 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -375,20 +375,18 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: .LBB30_2: -; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t -; CHECK-NEXT: srli a5, a2, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a3, .LBB30_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB30_4: -; CHECK-NEXT: slli a1, a2, 3 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-NEXT: sltu a1, a1, a3 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: srli a3, a2, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.vp.store.nxv16f64.p0nxv16f64( %val, * %ptr, %m, i32 %evl) @@ -402,62 +400,58 @@ define void @vpstore_nxv17f64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv17f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a4, a3, 1 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: mv a5, a2 ; CHECK-NEXT: bltu a2, a4, .LBB31_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: .LBB31_2: -; CHECK-NEXT: mv a7, a5 +; CHECK-NEXT: mv a6, a5 ; CHECK-NEXT: bltu a5, a3, .LBB31_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: mv a6, a3 ; CHECK-NEXT: .LBB31_4: -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-NEXT: sub a0, a5, a3 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a7, vlenb +; CHECK-NEXT: slli a7, a7, 3 +; CHECK-NEXT: sub sp, sp, a7 +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vse64.v v8, (a1), v0.t -; CHECK-NEXT: bltu a5, a0, .LBB31_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a6, a0 -; CHECK-NEXT: .LBB31_6: -; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: sub a0, a5, a3 +; CHECK-NEXT: sltu a5, a5, a0 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a0, a5, a0 ; CHECK-NEXT: srli a5, a3, 3 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v24, a5 ; CHECK-NEXT: slli a5, a3, 3 ; CHECK-NEXT: add a5, a1, a5 -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: addi a6, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a6) # Unknown-size Folded Reload -; CHECK-NEXT: vse64.v v8, (a5), v0.t -; CHECK-NEXT: srli a5, a3, 2 -; CHECK-NEXT: sub a6, a2, a4 -; CHECK-NEXT: slli a4, a3, 4 -; CHECK-NEXT: bltu a2, a6, .LBB31_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv a0, a6 -; CHECK-NEXT: .LBB31_8: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, a5 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: bltu a0, a3, .LBB31_10 -; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: sub a0, a2, a4 +; CHECK-NEXT: sltu a2, a2, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vse64.v v16, (a5), v0.t +; CHECK-NEXT: bltu a0, a3, .LBB31_6 +; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a3 -; CHECK-NEXT: .LBB31_10: +; CHECK-NEXT: .LBB31_6: +; CHECK-NEXT: srli a2, a3, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: slli a2, a3, 4 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v16, (a1), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vse64.v v8, (a1), v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -93,30 +93,27 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a3, a0 ; CHECK-NEXT: bltu a0, a2, .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: sub a1, a0, a2 ; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: bltu a0, a1, .LBB6_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB6_4: -; CHECK-NEXT: vsetvli zero, a4, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -128,30 +125,27 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a3, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: sub a1, a0, a2 ; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: bltu a0, a1, .LBB7_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB7_4: -; CHECK-NEXT: vsetvli zero, a4, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1153,30 +1153,27 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_umax_nxv32i32: ; RV32: # %bb.0: -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: srli a2, a3, 2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a3, a2, 2 +; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v24, v0, a3 +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: sub a2, a1, a3 +; RV32-NEXT: sltu a4, a1, a2 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a2, a4, a2 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: slli a3, a3, 1 ; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: mv a0, a1 ; RV32-NEXT: bltu a1, a3, .LBB67_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a3 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB67_2: -; RV32-NEXT: li a4, 0 -; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a2 -; RV32-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; RV32-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV32-NEXT: vmv.x.s a2, v25 +; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: sub a0, a1, a3 -; RV32-NEXT: vmv.s.x v8, a2 -; RV32-NEXT: bltu a1, a0, .LBB67_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: .LBB67_4: -; RV32-NEXT: vsetvli zero, a4, e32, m8, tu, ma +; RV32-NEXT: vmv.s.x v8, a0 +; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vredmaxu.vs v8, v16, v8, v0.t ; RV32-NEXT: vmv.x.s a0, v8 @@ -1186,30 +1183,27 @@ ; RV64: # %bb.0: ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: srli a2, a3, 2 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: slli a0, a3, 1 -; RV64-NEXT: srli a3, a4, 32 -; RV64-NEXT: mv a4, a1 -; RV64-NEXT: bltu a1, a0, .LBB67_2 +; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v24, v0, a2 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a2, a0, 32 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: sub a0, a1, a3 +; RV64-NEXT: sltu a4, a1, a0 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a0, a4, a0 +; RV64-NEXT: bltu a1, a3, .LBB67_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a4, a0 +; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB67_2: -; RV64-NEXT: li a5, 0 -; RV64-NEXT: vsetvli a6, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a2 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v25, a3 -; RV64-NEXT: vsetvli zero, a4, e32, m8, tu, ma +; RV64-NEXT: vmv.s.x v25, a2 +; RV64-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: vmv.x.s a2, v25 +; RV64-NEXT: vmv.x.s a1, v25 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: vmv.s.x v8, a2 -; RV64-NEXT: bltu a1, a0, .LBB67_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: .LBB67_4: -; RV64-NEXT: vsetvli zero, a5, e32, m8, tu, ma +; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vredmaxu.vs v8, v16, v8, v0.t ; RV64-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -389,30 +389,27 @@ define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv128i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vcpop.m a3, v8, v0.t +; CHECK-NEXT: snez a3, a3 ; CHECK-NEXT: bltu a1, a2, .LBB22_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vcpop.m a3, v11, v0.t -; CHECK-NEXT: snez a3, a3 -; CHECK-NEXT: sub a2, a1, a2 -; CHECK-NEXT: or a0, a3, a0 -; CHECK-NEXT: bltu a1, a2, .LBB22_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: .LBB22_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vcpop.m a1, v8, v0.t +; CHECK-NEXT: vcpop.m a1, v11, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: or a0, a3, a0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -357,32 +357,30 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a4, a1, 3 -; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: vl8re32.v v8, (a4) -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a1, a3, 3 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vl8re32.v v8, (a1) +; CHECK-NEXT: slli a1, a3, 1 ; CHECK-NEXT: sub a4, a2, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a2, a4, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a3 +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB27_4: +; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -415,32 +413,30 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a2, a1, 3 ; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: vl8re32.v v8, (a2) -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: sub a4, a1, a2 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a1, a4, .LBB28_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB28_2: -; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a4 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a1, a2, .LBB28_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a1, a2, .LBB28_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB28_4: +; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -703,31 +699,29 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a4, a1, 3 -; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: vl8re64.v v8, (a4) -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a4, a2, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: bltu a2, a4, .LBB48_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: .LBB48_2: -; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re64.v v8, (a3) +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a1, .LBB48_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a2, a1, .LBB48_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB48_4: +; CHECK-NEXT: .LBB48_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll @@ -152,23 +152,21 @@ ; CHECK-LABEL: vsext_nxv32i8_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vsext.vf4 v16, v10, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsext.vf4 v24, v8, v0.t @@ -183,22 +181,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v10 ; CHECK-NEXT: bltu a0, a1, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsext.vf4 v24, v8 -; CHECK-NEXT: bltu a0, a1, .LBB13_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB13_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v10 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.sext.nxv32i32.nxv32i8( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -314,23 +314,21 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfncvt.f.x.w v12, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 @@ -351,23 +349,21 @@ ; CHECK-LABEL: vsitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -381,21 +377,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.f.x.v v16, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.sitofp.nxv32f32.nxv32i32( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.trunc.nxv2i7.nxv2i16(, , i32) @@ -158,24 +158,22 @@ ; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t @@ -217,25 +215,23 @@ ; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB15_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB15_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB15_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB15_4: +; CHECK-NEXT: .LBB15_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t @@ -253,25 +249,23 @@ ; CHECK-LABEL: vtrunc_nxv32i8_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB16_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB16_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t @@ -291,90 +285,86 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: mv a5, a2 -; CHECK-NEXT: bltu a2, a4, .LBB17_2 +; CHECK-NEXT: srli a3, a1, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v25, v0, a3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a3, a0, a3 +; CHECK-NEXT: vl8re64.v v8, (a3) +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: sub a4, a2, a3 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 +; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a6, a6, a5 +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v25, a5 +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t +; CHECK-NEXT: bltu a4, a1, .LBB17_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB17_2: -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-NEXT: sub a7, a5, a1 -; CHECK-NEXT: vslidedown.vx v0, v24, a3 -; CHECK-NEXT: bltu a5, a7, .LBB17_4 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v26, v1, a5 +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB17_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB17_4: -; CHECK-NEXT: srli a7, a1, 2 -; CHECK-NEXT: slli t0, a1, 3 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB17_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: .LBB17_6: -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v1, v24, a7 -; CHECK-NEXT: add a7, a0, t0 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: sub a4, a2, a4 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB17_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv a6, a4 -; CHECK-NEXT: .LBB17_8: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vl8re64.v v16, (a7) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: sub a4, a6, a1 -; CHECK-NEXT: vslidedown.vx v0, v1, a3 -; CHECK-NEXT: bltu a6, a4, .LBB17_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: mv a2, a4 -; CHECK-NEXT: .LBB17_10: -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT: sub a0, a2, a1 +; CHECK-NEXT: sltu a3, a2, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v20, v24, 0, v0.t -; CHECK-NEXT: bltu a6, a1, .LBB17_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: mv a6, a1 -; CHECK-NEXT: .LBB17_12: -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v24, 0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB17_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: .LBB17_6: +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll @@ -314,23 +314,21 @@ ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfncvt.f.xu.w v12, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB25_4: +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 @@ -351,23 +349,21 @@ ; CHECK-LABEL: vuitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB26_4: +; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t @@ -381,21 +377,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 ; CHECK-NEXT: bltu a0, a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: bltu a0, a1, .LBB27_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.f.xu.v v16, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.uitofp.nxv32f32.nxv32i32( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll @@ -152,23 +152,21 @@ ; CHECK-LABEL: vzext_nxv32i8_nxv32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: bltu a0, a3, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vzext.vf4 v16, v10, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vzext.vf4 v24, v8, v0.t @@ -183,22 +181,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v10 ; CHECK-NEXT: bltu a0, a1, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vzext.vf4 v24, v8 -; CHECK-NEXT: bltu a0, a1, .LBB13_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB13_4: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v10 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.zext.nxv32i32.nxv32i8( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) ret %v diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -76,15 +76,19 @@ ; Compare if positive and select of constants where one constant is zero. define i32 @pos_sel_constants(i32 signext %a) { -; CHECK-LABEL: pos_sel_constants: -; CHECK: # %bb.0: -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: li a0, 5 -; CHECK-NEXT: bgez a1, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: ret +; RV32-LABEL: pos_sel_constants: +; RV32: # %bb.0: +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: andi a0, a0, 5 +; RV32-NEXT: ret +; +; RV64-LABEL: pos_sel_constants: +; RV64: # %bb.0: +; RV64-NEXT: slti a0, a0, 0 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: andi a0, a0, 5 +; RV64-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 5, i32 0 ret i32 %retval @@ -117,20 +121,16 @@ define i32 @pos_sel_variable_and_zero(i32 signext %a, i32 signext %b) { ; RV32I-LABEL: pos_sel_variable_and_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: bgez a0, .LBB6_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: pos_sel_variable_and_zero: ; RV64I: # %bb.0: -; RV64I-NEXT: bgez a0, .LBB6_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: pos_sel_variable_and_zero: @@ -154,18 +154,16 @@ define i32 @not_neg_sel_same_variable(i32 signext %a) { ; RV32I-LABEL: not_neg_sel_same_variable: ; RV32I: # %bb.0: -; RV32I-NEXT: bgtz a0, .LBB7_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sgtz a1, a0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: not_neg_sel_same_variable: ; RV64I: # %bb.0: -; RV64I-NEXT: bgtz a0, .LBB7_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: sgtz a1, a0 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: not_neg_sel_same_variable: @@ -187,19 +185,17 @@ ; RV32I-LABEL: sub_clamp_zero: ; RV32I: # %bb.0: ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: bgtz a0, .LBB8_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: sgtz a1, a0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sub_clamp_zero: ; RV64I: # %bb.0: ; RV64I-NEXT: subw a0, a0, a1 -; RV64I-NEXT: bgtz a0, .LBB8_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: sgtz a1, a0 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: sub_clamp_zero: @@ -222,12 +218,10 @@ define i8 @sel_shift_bool_i8(i1 %t) { ; CHECK-LABEL: sel_shift_bool_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: bnez a1, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: andi a0, a0, -128 ; CHECK-NEXT: ret %shl = select i1 %t, i8 128, i8 0 ret i8 %shl diff --git a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll --- a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll +++ b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll @@ -166,15 +166,18 @@ ; RV32I-NEXT: bltz a3, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a1, a0, a3 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB9_3 ; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: srli a5, a0, 1 ; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: .LBB9_3: ; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: slti a2, a3, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sll_redundant_mask_zeros_i64: @@ -197,15 +200,18 @@ ; RV32I-NEXT: bltz a3, .LBB10_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a1, a3 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB10_3 ; RV32I-NEXT: .LBB10_2: ; RV32I-NEXT: srl a0, a0, a2 -; RV32I-NEXT: slli a3, a1, 1 +; RV32I-NEXT: slli a5, a1, 1 ; RV32I-NEXT: xori a4, a4, 31 -; RV32I-NEXT: sll a3, a3, a4 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: sll a4, a5, a4 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: .LBB10_3: ; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slti a2, a3, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: srl_redundant_mask_zeros_i64: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -17,15 +17,18 @@ ; RV32I-NEXT: bltz a3, .LBB0_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a1, a3 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB0_3 ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: srl a0, a0, a2 -; RV32I-NEXT: xori a3, a2, 31 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: xori a4, a2, 31 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sll a4, a5, a4 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: .LBB0_3: ; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slti a2, a3, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr64: @@ -105,15 +108,18 @@ ; RV32I-NEXT: bltz a3, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a1, a0, a3 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB4_3 ; RV32I-NEXT: .LBB4_2: ; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: xori a3, a2, 31 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: srl a3, a4, a3 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: xori a4, a2, 31 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: slti a2, a3, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl64: @@ -145,115 +151,108 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: lshr128: ; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t1, a7, a2 -; RV32I-NEXT: sll t0, a5, a6 -; RV32I-NEXT: bltz t1, .LBB6_2 +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a3, 12(a1) +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: li t1, 64 +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: sub t0, a6, a2 +; RV32I-NEXT: sll a7, a4, a5 +; RV32I-NEXT: bltz t0, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: mv t2, a7 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srli t2, a5, 1 -; RV32I-NEXT: srl a7, t2, a7 -; RV32I-NEXT: or t2, a6, a7 +; RV32I-NEXT: sll a5, a3, a5 +; RV32I-NEXT: sub a6, t1, a2 +; RV32I-NEXT: xori a6, a6, 31 +; RV32I-NEXT: srli t2, a4, 1 +; RV32I-NEXT: srl a6, t2, a6 +; RV32I-NEXT: or t2, a5, a6 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: lw t5, 4(a1) +; RV32I-NEXT: lw t6, 4(a1) ; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: bgez a6, .LBB6_5 +; RV32I-NEXT: slti a5, a6, 0 +; RV32I-NEXT: neg a5, a5 +; RV32I-NEXT: addi t4, a2, -64 +; RV32I-NEXT: addi t5, a2, -96 +; RV32I-NEXT: bltu a2, t1, .LBB6_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a7, t5, a2 -; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: srl t2, a3, t4 +; RV32I-NEXT: slti t3, t5, 0 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: and t3, t3, t2 +; RV32I-NEXT: mv t2, t6 +; RV32I-NEXT: bnez a2, .LBB6_6 +; RV32I-NEXT: j .LBB6_7 ; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz t4, .LBB6_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB6_8 -; RV32I-NEXT: j .LBB6_9 +; RV32I-NEXT: srl t3, t6, a2 +; RV32I-NEXT: and t3, a5, t3 +; RV32I-NEXT: or t3, t3, t2 +; RV32I-NEXT: mv t2, t6 +; RV32I-NEXT: beqz a2, .LBB6_7 +; RV32I-NEXT: .LBB6_6: +; RV32I-NEXT: mv t2, t3 ; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: srl a7, a4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB6_9 -; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: xori t3, a2, 31 +; RV32I-NEXT: bltz a6, .LBB6_10 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: srl s0, t6, a6 +; RV32I-NEXT: slli t6, a3, 1 +; RV32I-NEXT: bgez t5, .LBB6_11 ; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: beqz a2, .LBB6_11 -; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: srl t5, a4, t4 +; RV32I-NEXT: xori t4, t4, 31 +; RV32I-NEXT: sll t4, t6, t4 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: bltu a2, t1, .LBB6_12 +; RV32I-NEXT: j .LBB6_13 +; RV32I-NEXT: .LBB6_10: +; RV32I-NEXT: srl s0, a1, a2 +; RV32I-NEXT: slli t6, t6, 1 +; RV32I-NEXT: sll t6, t6, t3 +; RV32I-NEXT: or s0, s0, t6 +; RV32I-NEXT: slli t6, a3, 1 +; RV32I-NEXT: bltz t5, .LBB6_9 ; RV32I-NEXT: .LBB6_11: -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t2, a2, 31 -; RV32I-NEXT: bltz a6, .LBB6_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl t5, t5, a6 -; RV32I-NEXT: bltz t1, .LBB6_14 -; RV32I-NEXT: j .LBB6_15 +; RV32I-NEXT: srl t4, a3, t5 +; RV32I-NEXT: bgeu a2, t1, .LBB6_13 +; RV32I-NEXT: .LBB6_12: +; RV32I-NEXT: slti t0, t0, 0 +; RV32I-NEXT: neg t0, t0 +; RV32I-NEXT: and a7, t0, a7 +; RV32I-NEXT: or t4, s0, a7 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: srl t6, a1, a2 -; RV32I-NEXT: slli t5, t5, 1 -; RV32I-NEXT: sll t5, t5, t2 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: bgez t1, .LBB6_15 -; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: or t5, t5, t0 +; RV32I-NEXT: bnez a2, .LBB6_16 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: bltz a6, .LBB6_17 ; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: slli t0, a4, 1 -; RV32I-NEXT: bltz t4, .LBB6_17 -; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: srl t1, a4, t4 -; RV32I-NEXT: bgeu a2, a3, .LBB6_18 -; RV32I-NEXT: j .LBB6_19 +; RV32I-NEXT: srl a4, a3, a6 +; RV32I-NEXT: j .LBB6_18 +; RV32I-NEXT: .LBB6_16: +; RV32I-NEXT: mv a1, t4 +; RV32I-NEXT: bgez a6, .LBB6_15 ; RV32I-NEXT: .LBB6_17: -; RV32I-NEXT: srl t1, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: sll t3, t0, t3 -; RV32I-NEXT: or t1, t1, t3 -; RV32I-NEXT: bltu a2, a3, .LBB6_19 -; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: mv t5, t1 -; RV32I-NEXT: .LBB6_19: -; RV32I-NEXT: bnez a2, .LBB6_22 -; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a6, .LBB6_23 -; RV32I-NEXT: .LBB6_21: -; RV32I-NEXT: srl a5, a4, a6 -; RV32I-NEXT: bgeu a2, a3, .LBB6_24 -; RV32I-NEXT: j .LBB6_25 -; RV32I-NEXT: .LBB6_22: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bgez a6, .LBB6_21 -; RV32I-NEXT: .LBB6_23: -; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: sll t0, t0, t2 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: bltu a2, a3, .LBB6_25 -; RV32I-NEXT: .LBB6_24: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: .LBB6_25: -; RV32I-NEXT: bltz a6, .LBB6_27 -; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB6_28 -; RV32I-NEXT: j .LBB6_29 -; RV32I-NEXT: .LBB6_27: ; RV32I-NEXT: srl a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB6_29 -; RV32I-NEXT: .LBB6_28: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: .LBB6_29: -; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sll a6, t6, t3 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: .LBB6_18: +; RV32I-NEXT: sltiu a6, a2, 64 +; RV32I-NEXT: neg a6, a6 +; RV32I-NEXT: and a4, a6, a4 +; RV32I-NEXT: srl a2, a3, a2 +; RV32I-NEXT: and a2, a5, a2 +; RV32I-NEXT: and a2, a6, a2 +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a7, 4(a0) +; RV32I-NEXT: sw t2, 4(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr128: @@ -262,15 +261,18 @@ ; RV64I-NEXT: bltz a3, .LBB6_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: srl a0, a1, a3 -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: ret +; RV64I-NEXT: j .LBB6_3 ; RV64I-NEXT: .LBB6_2: ; RV64I-NEXT: srl a0, a0, a2 -; RV64I-NEXT: xori a3, a2, 63 -; RV64I-NEXT: slli a4, a1, 1 -; RV64I-NEXT: sll a3, a4, a3 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: xori a4, a2, 63 +; RV64I-NEXT: slli a5, a1, 1 +; RV64I-NEXT: sll a4, a5, a4 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: .LBB6_3: ; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: slti a2, a3, 0 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: ret %1 = lshr i128 %a, %b ret i128 %1 @@ -282,115 +284,113 @@ ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a6, a2 +; RV32I-NEXT: neg a5, a2 ; RV32I-NEXT: li a3, 64 ; RV32I-NEXT: li a7, 32 ; RV32I-NEXT: sub t2, a7, a2 -; RV32I-NEXT: sll t1, a5, a6 +; RV32I-NEXT: sll t1, a6, a5 ; RV32I-NEXT: bltz t2, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: mv a7, t1 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a6, a4, a6 +; RV32I-NEXT: sll a5, a4, a5 ; RV32I-NEXT: sub a7, a3, a2 ; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srli t0, a5, 1 +; RV32I-NEXT: srli t0, a6, 1 ; RV32I-NEXT: srl a7, t0, a7 -; RV32I-NEXT: or t4, a6, a7 +; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: lw t6, 4(a1) -; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: bgez a6, .LBB7_5 +; RV32I-NEXT: addi t3, a2, -64 +; RV32I-NEXT: addi t4, a2, -96 +; RV32I-NEXT: srai a5, a4, 31 +; RV32I-NEXT: bltz t4, .LBB7_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a7, t6, a2 -; RV32I-NEXT: or t4, t4, a7 +; RV32I-NEXT: mv t5, a5 +; RV32I-NEXT: j .LBB7_6 ; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: addi t5, a2, -96 -; RV32I-NEXT: srai a7, a4, 31 -; RV32I-NEXT: bltz t5, .LBB7_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: sra t5, a4, t3 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: lw t6, 4(a1) +; RV32I-NEXT: addi t0, a2, -32 ; RV32I-NEXT: bgeu a2, a3, .LBB7_8 -; RV32I-NEXT: j .LBB7_9 -; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: sra t0, a4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB7_9 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: slti t5, t0, 0 +; RV32I-NEXT: srl s0, t6, a2 +; RV32I-NEXT: neg t5, t5 +; RV32I-NEXT: and t5, t5, s0 +; RV32I-NEXT: or t5, t5, a7 ; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: mv t0, t6 -; RV32I-NEXT: beqz a2, .LBB7_11 -; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv t0, t4 -; RV32I-NEXT: .LBB7_11: +; RV32I-NEXT: mv a7, t6 +; RV32I-NEXT: beqz a2, .LBB7_10 +; RV32I-NEXT: # %bb.9: +; RV32I-NEXT: mv a7, t5 +; RV32I-NEXT: .LBB7_10: ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t4, a2, 31 -; RV32I-NEXT: bltz a6, .LBB7_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl t6, t6, a6 -; RV32I-NEXT: bltz t2, .LBB7_14 -; RV32I-NEXT: j .LBB7_15 +; RV32I-NEXT: xori t5, a2, 31 +; RV32I-NEXT: bltz t0, .LBB7_13 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: srl s0, t6, t0 +; RV32I-NEXT: slli t6, a4, 1 +; RV32I-NEXT: bgez t4, .LBB7_14 +; RV32I-NEXT: .LBB7_12: +; RV32I-NEXT: srl t4, a6, t3 +; RV32I-NEXT: xori t3, t3, 31 +; RV32I-NEXT: sll t3, t6, t3 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB7_15 +; RV32I-NEXT: j .LBB7_16 ; RV32I-NEXT: .LBB7_13: ; RV32I-NEXT: srl s0, a1, a2 ; RV32I-NEXT: slli t6, t6, 1 -; RV32I-NEXT: sll t6, t6, t4 -; RV32I-NEXT: or t6, s0, t6 -; RV32I-NEXT: bgez t2, .LBB7_15 +; RV32I-NEXT: sll t6, t6, t5 +; RV32I-NEXT: or s0, s0, t6 +; RV32I-NEXT: slli t6, a4, 1 +; RV32I-NEXT: bltz t4, .LBB7_12 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: or t6, t6, t1 +; RV32I-NEXT: sra t3, a4, t4 +; RV32I-NEXT: bgeu a2, a3, .LBB7_16 ; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: slli t1, a4, 1 -; RV32I-NEXT: bltz t5, .LBB7_17 -; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sra t2, a4, t5 -; RV32I-NEXT: bgeu a2, a3, .LBB7_18 -; RV32I-NEXT: j .LBB7_19 -; RV32I-NEXT: .LBB7_17: -; RV32I-NEXT: srl t2, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: sll t3, t1, t3 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: bltu a2, a3, .LBB7_19 +; RV32I-NEXT: slti t2, t2, 0 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: and t1, t2, t1 +; RV32I-NEXT: or t3, s0, t1 +; RV32I-NEXT: .LBB7_16: +; RV32I-NEXT: bnez a2, .LBB7_19 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: bltz t0, .LBB7_20 ; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: mv t6, t2 +; RV32I-NEXT: sra a6, a4, t0 +; RV32I-NEXT: bgeu a2, a3, .LBB7_21 +; RV32I-NEXT: j .LBB7_22 ; RV32I-NEXT: .LBB7_19: -; RV32I-NEXT: bnez a2, .LBB7_22 -; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a6, .LBB7_23 +; RV32I-NEXT: mv a1, t3 +; RV32I-NEXT: bgez t0, .LBB7_18 +; RV32I-NEXT: .LBB7_20: +; RV32I-NEXT: srl a6, a6, a2 +; RV32I-NEXT: sll t1, t6, t5 +; RV32I-NEXT: or a6, a6, t1 +; RV32I-NEXT: bltu a2, a3, .LBB7_22 ; RV32I-NEXT: .LBB7_21: -; RV32I-NEXT: sra a5, a4, a6 -; RV32I-NEXT: bgeu a2, a3, .LBB7_24 -; RV32I-NEXT: j .LBB7_25 +; RV32I-NEXT: mv a6, a5 ; RV32I-NEXT: .LBB7_22: -; RV32I-NEXT: mv a1, t6 -; RV32I-NEXT: bgez a6, .LBB7_21 -; RV32I-NEXT: .LBB7_23: -; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: sll t1, t1, t4 -; RV32I-NEXT: or a5, a5, t1 -; RV32I-NEXT: bltu a2, a3, .LBB7_25 +; RV32I-NEXT: bltz t0, .LBB7_24 +; RV32I-NEXT: # %bb.23: +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: bgeu a2, a3, .LBB7_25 +; RV32I-NEXT: j .LBB7_26 ; RV32I-NEXT: .LBB7_24: -; RV32I-NEXT: mv a5, a7 -; RV32I-NEXT: .LBB7_25: -; RV32I-NEXT: bltz a6, .LBB7_27 -; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a4, a7 -; RV32I-NEXT: bgeu a2, a3, .LBB7_28 -; RV32I-NEXT: j .LBB7_29 -; RV32I-NEXT: .LBB7_27: ; RV32I-NEXT: sra a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB7_29 -; RV32I-NEXT: .LBB7_28: -; RV32I-NEXT: mv a4, a7 -; RV32I-NEXT: .LBB7_29: +; RV32I-NEXT: bltu a2, a3, .LBB7_26 +; RV32I-NEXT: .LBB7_25: +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: .LBB7_26: ; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a6, 8(a0) ; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw t0, 4(a0) +; RV32I-NEXT: sw a7, 4(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -418,115 +418,107 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t1, a7, a2 -; RV32I-NEXT: srl t0, a5, a6 -; RV32I-NEXT: bltz t1, .LBB8_2 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: li t0, 64 +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: sub a7, a6, a2 +; RV32I-NEXT: srl a6, a3, a5 +; RV32I-NEXT: bltz a7, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: mv t1, a6 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: slli t2, a5, 1 -; RV32I-NEXT: sll a7, t2, a7 -; RV32I-NEXT: or t2, a6, a7 +; RV32I-NEXT: srl a5, a4, a5 +; RV32I-NEXT: sub t1, t0, a2 +; RV32I-NEXT: xori t1, t1, 31 +; RV32I-NEXT: slli t2, a3, 1 +; RV32I-NEXT: sll t1, t2, t1 +; RV32I-NEXT: or t1, a5, t1 ; RV32I-NEXT: .LBB8_3: ; RV32I-NEXT: lw t5, 8(a1) -; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: bgez a6, .LBB8_5 +; RV32I-NEXT: addi a5, a2, -32 +; RV32I-NEXT: slti t2, a5, 0 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: addi t4, a2, -64 +; RV32I-NEXT: addi t6, a2, -96 +; RV32I-NEXT: bltu a2, t0, .LBB8_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a7, t5, a2 -; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: sll t1, a4, t4 +; RV32I-NEXT: slti t3, t6, 0 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: and t3, t3, t1 +; RV32I-NEXT: mv t1, t5 +; RV32I-NEXT: bnez a2, .LBB8_6 +; RV32I-NEXT: j .LBB8_7 ; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz t4, .LBB8_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB8_8 -; RV32I-NEXT: j .LBB8_9 +; RV32I-NEXT: sll t3, t5, a2 +; RV32I-NEXT: and t3, t2, t3 +; RV32I-NEXT: or t3, t3, t1 +; RV32I-NEXT: mv t1, t5 +; RV32I-NEXT: beqz a2, .LBB8_7 +; RV32I-NEXT: .LBB8_6: +; RV32I-NEXT: mv t1, t3 ; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: sll a7, a4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB8_9 -; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: xori t3, a2, 31 +; RV32I-NEXT: bltz a5, .LBB8_10 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: sll s0, t5, a5 +; RV32I-NEXT: srli t5, a4, 1 +; RV32I-NEXT: bgez t6, .LBB8_11 ; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: beqz a2, .LBB8_11 -; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: sll t6, a3, t4 +; RV32I-NEXT: xori t4, t4, 31 +; RV32I-NEXT: srl t4, t5, t4 +; RV32I-NEXT: or t4, t6, t4 +; RV32I-NEXT: bltu a2, t0, .LBB8_12 +; RV32I-NEXT: j .LBB8_13 +; RV32I-NEXT: .LBB8_10: +; RV32I-NEXT: sll s0, a1, a2 +; RV32I-NEXT: srli t5, t5, 1 +; RV32I-NEXT: srl t5, t5, t3 +; RV32I-NEXT: or s0, s0, t5 +; RV32I-NEXT: srli t5, a4, 1 +; RV32I-NEXT: bltz t6, .LBB8_9 ; RV32I-NEXT: .LBB8_11: -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: xori t2, a2, 31 -; RV32I-NEXT: bltz a6, .LBB8_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: sll t5, t5, a6 -; RV32I-NEXT: bltz t1, .LBB8_14 -; RV32I-NEXT: j .LBB8_15 +; RV32I-NEXT: sll t4, a4, t6 +; RV32I-NEXT: bgeu a2, t0, .LBB8_13 +; RV32I-NEXT: .LBB8_12: +; RV32I-NEXT: slti a7, a7, 0 +; RV32I-NEXT: neg a7, a7 +; RV32I-NEXT: and a6, a7, a6 +; RV32I-NEXT: or t4, s0, a6 ; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll t6, a1, a2 -; RV32I-NEXT: srli t5, t5, 1 -; RV32I-NEXT: srl t5, t5, t2 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: bgez t1, .LBB8_15 -; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: or t5, t5, t0 +; RV32I-NEXT: beqz a2, .LBB8_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: mv a1, t4 ; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: srli t0, a4, 1 -; RV32I-NEXT: bltz t4, .LBB8_17 +; RV32I-NEXT: sll a6, a4, a2 +; RV32I-NEXT: and a6, t2, a6 +; RV32I-NEXT: sltiu a7, a2, 64 +; RV32I-NEXT: neg a7, a7 +; RV32I-NEXT: and a6, a7, a6 +; RV32I-NEXT: bltz a5, .LBB8_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sll t1, a4, t4 -; RV32I-NEXT: bgeu a2, a3, .LBB8_18 -; RV32I-NEXT: j .LBB8_19 +; RV32I-NEXT: sll a2, a4, a5 +; RV32I-NEXT: j .LBB8_18 ; RV32I-NEXT: .LBB8_17: -; RV32I-NEXT: sll t1, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: srl t3, t0, t3 -; RV32I-NEXT: or t1, t1, t3 -; RV32I-NEXT: bltu a2, a3, .LBB8_19 +; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: srl a3, t5, t3 +; RV32I-NEXT: or a2, a2, a3 ; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: mv t5, t1 -; RV32I-NEXT: .LBB8_19: -; RV32I-NEXT: bnez a2, .LBB8_22 -; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a6, .LBB8_23 -; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: sll a5, a4, a6 -; RV32I-NEXT: bgeu a2, a3, .LBB8_24 -; RV32I-NEXT: j .LBB8_25 -; RV32I-NEXT: .LBB8_22: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bgez a6, .LBB8_21 -; RV32I-NEXT: .LBB8_23: -; RV32I-NEXT: sll a5, a5, a2 -; RV32I-NEXT: srl t0, t0, t2 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: bltu a2, a3, .LBB8_25 -; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: bltz a6, .LBB8_27 -; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB8_28 -; RV32I-NEXT: j .LBB8_29 -; RV32I-NEXT: .LBB8_27: -; RV32I-NEXT: sll a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB8_29 -; RV32I-NEXT: .LBB8_28: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: .LBB8_29: -; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: and a2, a7, a2 +; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a6, 0(a0) ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a7, 8(a0) +; RV32I-NEXT: sw t1, 8(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl128: @@ -535,15 +527,18 @@ ; RV64I-NEXT: bltz a3, .LBB8_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sll a1, a0, a3 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: ret +; RV64I-NEXT: j .LBB8_3 ; RV64I-NEXT: .LBB8_2: ; RV64I-NEXT: sll a1, a1, a2 -; RV64I-NEXT: xori a3, a2, 63 -; RV64I-NEXT: srli a4, a0, 1 -; RV64I-NEXT: srl a3, a4, a3 -; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: xori a4, a2, 63 +; RV64I-NEXT: srli a5, a0, 1 +; RV64I-NEXT: srl a4, a5, a4 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: .LBB8_3: ; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: slti a2, a3, 0 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: and a0, a2, a0 ; RV64I-NEXT: ret %1 = shl i128 %a, %b ret i128 %1 diff --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll --- a/llvm/test/CodeGen/RISCV/usub_sat.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat.ll @@ -13,24 +13,18 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind { ; RV32I-LABEL: func: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB0_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: subw a1, a0, a1 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB0_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func: @@ -57,29 +51,23 @@ ; RV32I-NEXT: sub a2, a0, a2 ; RV32I-NEXT: beq a3, a1, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a1, a3 +; RV32I-NEXT: sltu a0, a1, a3 ; RV32I-NEXT: j .LBB1_3 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: sltu a0, a0, a2 ; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bnez a4, .LBB1_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: .LBB1_5: +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: and a0, a1, a2 +; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func2: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: sub a1, a0, a1 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB1_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB1_2: +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func2: @@ -90,18 +78,15 @@ ; RV32IZbb-NEXT: sub a2, a0, a2 ; RV32IZbb-NEXT: beq a3, a1, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: -; RV32IZbb-NEXT: sltu a4, a1, a3 +; RV32IZbb-NEXT: sltu a0, a1, a3 ; RV32IZbb-NEXT: j .LBB1_3 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: sltu a4, a0, a2 +; RV32IZbb-NEXT: sltu a0, a0, a2 ; RV32IZbb-NEXT: .LBB1_3: -; RV32IZbb-NEXT: li a0, 0 -; RV32IZbb-NEXT: li a1, 0 -; RV32IZbb-NEXT: bnez a4, .LBB1_5 -; RV32IZbb-NEXT: # %bb.4: -; RV32IZbb-NEXT: mv a0, a2 -; RV32IZbb-NEXT: mv a1, a3 -; RV32IZbb-NEXT: .LBB1_5: +; RV32IZbb-NEXT: snez a0, a0 +; RV32IZbb-NEXT: addi a1, a0, -1 +; RV32IZbb-NEXT: and a0, a1, a2 +; RV32IZbb-NEXT: and a1, a1, a3 ; RV32IZbb-NEXT: ret ; ; RV64IZbb-LABEL: func2: @@ -116,24 +101,18 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind { ; RV32I-LABEL: func16: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB2_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func16: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: sub a1, a0, a1 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB2_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func16: @@ -154,24 +133,18 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { ; RV32I-LABEL: func8: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB3_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func8: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: sub a1, a0, a1 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB3_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func8: @@ -192,24 +165,18 @@ define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind { ; RV32I-LABEL: func3: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB4_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func3: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: sub a1, a0, a1 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB4_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func3: diff --git a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll @@ -13,26 +13,21 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; RV32I-LABEL: func32: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mul a0, a1, a2 -; RV32I-NEXT: sub a1, a3, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a3, a1, .LBB0_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: mul a1, a1, a2 +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func32: ; RV64I: # %bb.0: ; RV64I-NEXT: mulw a1, a1, a2 ; RV64I-NEXT: subw a1, a0, a1 -; RV64I-NEXT: sext.w a2, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB0_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func32: @@ -63,29 +58,23 @@ ; RV32I-NEXT: sub a3, a0, a4 ; RV32I-NEXT: beq a2, a1, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: sltu a0, a1, a2 ; RV32I-NEXT: j .LBB1_3 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: sltu a0, a0, a3 ; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: bnez a4, .LBB1_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: mv a1, a2 -; RV32I-NEXT: .LBB1_5: +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: and a0, a1, a3 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: sub a2, a0, a2 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a1, a2, .LBB1_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a2 -; RV64I-NEXT: .LBB1_2: +; RV64I-NEXT: sub a1, a0, a2 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func64: @@ -96,18 +85,15 @@ ; RV32IZbb-NEXT: sub a3, a0, a4 ; RV32IZbb-NEXT: beq a2, a1, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: -; RV32IZbb-NEXT: sltu a4, a1, a2 +; RV32IZbb-NEXT: sltu a0, a1, a2 ; RV32IZbb-NEXT: j .LBB1_3 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: sltu a4, a0, a3 +; RV32IZbb-NEXT: sltu a0, a0, a3 ; RV32IZbb-NEXT: .LBB1_3: -; RV32IZbb-NEXT: li a0, 0 -; RV32IZbb-NEXT: li a1, 0 -; RV32IZbb-NEXT: bnez a4, .LBB1_5 -; RV32IZbb-NEXT: # %bb.4: -; RV32IZbb-NEXT: mv a0, a3 -; RV32IZbb-NEXT: mv a1, a2 -; RV32IZbb-NEXT: .LBB1_5: +; RV32IZbb-NEXT: snez a0, a0 +; RV32IZbb-NEXT: addi a1, a0, -1 +; RV32IZbb-NEXT: and a0, a1, a3 +; RV32IZbb-NEXT: and a1, a1, a2 ; RV32IZbb-NEXT: ret ; ; RV64IZbb-LABEL: func64: @@ -125,30 +111,26 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a3, 16 ; RV32I-NEXT: addi a3, a3, -1 -; RV32I-NEXT: and a4, a0, a3 -; RV32I-NEXT: mul a0, a1, a2 ; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: sub a1, a4, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a4, a1, .LBB2_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: mul a1, a1, a2 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func16: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: addiw a3, a3, -1 -; RV64I-NEXT: and a4, a0, a3 -; RV64I-NEXT: mul a0, a1, a2 ; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: sub a1, a4, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a4, a1, .LBB2_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: sub a1, a0, a1 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func16: @@ -176,28 +158,24 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV32I-LABEL: func8: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a0, 255 -; RV32I-NEXT: mul a0, a1, a2 ; RV32I-NEXT: andi a0, a0, 255 -; RV32I-NEXT: sub a1, a3, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a3, a1, .LBB3_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: mul a1, a1, a2 +; RV32I-NEXT: andi a1, a1, 255 +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func8: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a3, a0, 255 -; RV64I-NEXT: mulw a0, a1, a2 ; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: sub a1, a3, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a3, a1, .LBB3_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: mulw a1, a1, a2 +; RV64I-NEXT: andi a1, a1, 255 +; RV64I-NEXT: sub a1, a0, a1 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func8: @@ -225,28 +203,24 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; RV32I-LABEL: func4: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a0, 15 -; RV32I-NEXT: mul a0, a1, a2 ; RV32I-NEXT: andi a0, a0, 15 -; RV32I-NEXT: sub a1, a3, a0 -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a3, a1, .LBB4_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: mul a1, a1, a2 +; RV32I-NEXT: andi a1, a1, 15 +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func4: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a3, a0, 15 -; RV64I-NEXT: mulw a0, a1, a2 ; RV64I-NEXT: andi a0, a0, 15 -; RV64I-NEXT: sub a1, a3, a0 -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a3, a1, .LBB4_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: mulw a1, a1, a2 +; RV64I-NEXT: andi a1, a1, 15 +; RV64I-NEXT: sub a1, a0, a1 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func4: diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -12,71 +12,55 @@ ; RV32-LABEL: vec3_setcc_crash: ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: slli a2, a0, 8 -; RV32-NEXT: slli a3, a0, 24 -; RV32-NEXT: slli a4, a0, 16 -; RV32-NEXT: srai a5, a4, 24 +; RV32-NEXT: srli a2, a0, 16 +; RV32-NEXT: slli a3, a0, 8 ; RV32-NEXT: srai a3, a3, 24 -; RV32-NEXT: bgtz a5, .LBB0_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a5, 0 -; RV32-NEXT: j .LBB0_3 -; RV32-NEXT: .LBB0_2: -; RV32-NEXT: srli a5, a4, 24 -; RV32-NEXT: .LBB0_3: -; RV32-NEXT: srai a4, a2, 24 -; RV32-NEXT: slli a2, a5, 8 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: bgtz a3, .LBB0_5 -; RV32-NEXT: # %bb.4: -; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB0_5: -; RV32-NEXT: andi a3, a5, 255 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bgtz a4, .LBB0_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB0_8 -; RV32-NEXT: .LBB0_7: -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: .LBB0_8: -; RV32-NEXT: sb a0, 2(a1) -; RV32-NEXT: sh a2, 0(a1) +; RV32-NEXT: slli a4, a0, 24 +; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: srli a5, a0, 8 +; RV32-NEXT: slli a6, a0, 16 +; RV32-NEXT: srai a6, a6, 24 +; RV32-NEXT: sgtz a6, a6 +; RV32-NEXT: neg a6, a6 +; RV32-NEXT: and a5, a6, a5 +; RV32-NEXT: slli a5, a5, 8 +; RV32-NEXT: sgtz a4, a4 +; RV32-NEXT: neg a4, a4 +; RV32-NEXT: and a0, a4, a0 +; RV32-NEXT: andi a0, a0, 255 +; RV32-NEXT: or a0, a0, a5 +; RV32-NEXT: sgtz a3, a3 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sb a2, 2(a1) +; RV32-NEXT: sh a0, 0(a1) ; RV32-NEXT: ret ; ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: ; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: slli a2, a0, 40 -; RV64-NEXT: slli a3, a0, 56 -; RV64-NEXT: slli a4, a0, 48 -; RV64-NEXT: srai a5, a4, 56 +; RV64-NEXT: srli a2, a0, 16 +; RV64-NEXT: slli a3, a0, 40 ; RV64-NEXT: srai a3, a3, 56 -; RV64-NEXT: bgtz a5, .LBB0_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a5, 0 -; RV64-NEXT: j .LBB0_3 -; RV64-NEXT: .LBB0_2: -; RV64-NEXT: srli a5, a4, 56 -; RV64-NEXT: .LBB0_3: -; RV64-NEXT: srai a4, a2, 56 -; RV64-NEXT: slli a2, a5, 8 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: bgtz a3, .LBB0_5 -; RV64-NEXT: # %bb.4: -; RV64-NEXT: li a5, 0 -; RV64-NEXT: .LBB0_5: -; RV64-NEXT: andi a3, a5, 255 -; RV64-NEXT: or a2, a3, a2 -; RV64-NEXT: bgtz a4, .LBB0_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB0_8 -; RV64-NEXT: .LBB0_7: -; RV64-NEXT: srliw a0, a0, 16 -; RV64-NEXT: .LBB0_8: -; RV64-NEXT: sb a0, 2(a1) -; RV64-NEXT: sh a2, 0(a1) +; RV64-NEXT: slli a4, a0, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: srli a5, a0, 8 +; RV64-NEXT: slli a6, a0, 48 +; RV64-NEXT: srai a6, a6, 56 +; RV64-NEXT: sgtz a6, a6 +; RV64-NEXT: neg a6, a6 +; RV64-NEXT: and a5, a6, a5 +; RV64-NEXT: slli a5, a5, 8 +; RV64-NEXT: sgtz a4, a4 +; RV64-NEXT: neg a4, a4 +; RV64-NEXT: and a0, a4, a0 +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: or a0, a0, a5 +; RV64-NEXT: sgtz a3, a3 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sb a2, 2(a1) +; RV64-NEXT: sh a0, 0(a1) ; RV64-NEXT: ret %a = load <3 x i8>, <3 x i8>* %in %cmp = icmp sgt <3 x i8> %a, zeroinitializer