diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1302,30 +1302,41 @@ node:$falsev), [{}], IntCCtoRISCVCC>; -let usesCustomInserter = 1 in -class SelectCC_rrirr - : Pseudo<(outs valty:$dst), - (ins cmpty:$lhs, cmpty:$rhs, ixlenimm:$imm, - valty:$truev, valty:$falsev), - [(set valty:$dst, - (riscv_selectcc_frag:$imm cmpty:$lhs, cmpty:$rhs, cond, - valty:$truev, valty:$falsev))]>; - -def Select_GPR_Using_CC_GPR : SelectCC_rrirr; +multiclass SelectCC_GPR_rrirr { + let usesCustomInserter = 1 in + def _Using_CC_GPR : Pseudo<(outs valty:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + valty:$truev, valty:$falsev), + [(set valty:$dst, + (riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs, cond, + valty:$truev, valty:$falsev))]>; + // Explicitly select 0 in the condition to X0. The register coalescer doesn't + // always do it. + def : Pat<(riscv_selectcc_frag:$cc GPR:$lhs, 0, cond, valty:$truev, + valty:$falsev), + (!cast(NAME#"_Using_CC_GPR") GPR:$lhs, X0, + (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>; +} + +defm Select_GPR : SelectCC_GPR_rrirr; /// Branches and jumps // Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction. -class BccPat - : Pat<(riscv_brcc GPR:$rs1, GPR:$rs2, Cond, bb:$imm12), - (Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>; - -def : BccPat; -def : BccPat; -def : BccPat; -def : BccPat; -def : BccPat; -def : BccPat; +multiclass BccPat { + def : Pat<(riscv_brcc GPR:$rs1, GPR:$rs2, Cond, bb:$imm12), + (Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>; + // Explicitly select 0 to X0. The register coalescer doesn't always do it. + def : Pat<(riscv_brcc GPR:$rs1, 0, Cond, bb:$imm12), + (Inst GPR:$rs1, X0, simm13_lsb0:$imm12)>; +} + +defm : BccPat; +defm : BccPat; +defm : BccPat; +defm : BccPat; +defm : BccPat; +defm : BccPat; let isBarrier = 1, isBranch = 1, isTerminator = 1 in def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -334,7 +334,7 @@ def : PatSetCC; def : PatSetCC; -def Select_FPR64_Using_CC_GPR : SelectCC_rrirr; +defm Select_FPR64 : SelectCC_GPR_rrirr; /// Loads diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -564,7 +564,7 @@ def : PatSetCC; def : PatSetCC; -def Select_FPR32_Using_CC_GPR : SelectCC_rrirr; +defm Select_FPR32 : SelectCC_GPR_rrirr; /// Loads diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -336,7 +336,7 @@ def : PatSetCC; def : PatSetCC; -def Select_FPR16_Using_CC_GPR : SelectCC_rrirr; +defm Select_FPR16 : SelectCC_GPR_rrirr; } // Predicates = [HasStdExtZfh] let Predicates = [HasStdExtZfhOrZfhmin] in { diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -109,42 +109,44 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a0, 269824 +; RV32I-NEXT: addi a3, a0, -1 +; RV32I-NEXT: lui a2, 1047552 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __gtdf2@plt +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lui a3, 794112 -; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfsi@plt +; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: lui s4, 524288 -; RV32I-NEXT: blt s3, s0, .LBB3_2 +; RV32I-NEXT: bltz s4, .LBB3_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: .LBB3_2: # %start -; RV32I-NEXT: lui a0, 269824 -; RV32I-NEXT: addi a3, a0, -1 -; RV32I-NEXT: lui a2, 1047552 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: bge s0, a0, .LBB3_4 +; RV32I-NEXT: bge s2, s3, .LBB3_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: addi s4, s5, -1 +; RV32I-NEXT: addi s5, a1, -1 ; RV32I-NEXT: .LBB3_4: # %start -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bne a0, s0, .LBB3_6 +; RV32I-NEXT: bnez a0, .LBB3_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s0, s4 +; RV32I-NEXT: mv s2, s5 ; RV32I-NEXT: .LBB3_6: # %start -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -163,21 +165,19 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a0, -497 ; RV64I-NEXT: slli a1, a0, 53 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: lui s3, 524288 -; RV64I-NEXT: bltz s2, .LBB3_2 +; RV64I-NEXT: lui s2, 524288 +; RV64I-NEXT: bltz s1, .LBB3_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB3_2: # %start ; RV64I-NEXT: li a0, 527 ; RV64I-NEXT: slli a0, a0, 31 @@ -185,24 +185,24 @@ ; RV64I-NEXT: slli a1, a0, 22 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: bge s1, a0, .LBB3_4 +; RV64I-NEXT: blez a0, .LBB3_4 ; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: addiw s3, s4, -1 +; RV64I-NEXT: addiw s2, s3, -1 ; RV64I-NEXT: .LBB3_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s1, .LBB3_6 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB3_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: .LBB3_6: # %start -; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret start: @@ -610,16 +610,15 @@ ; ; RV32I-LABEL: fcvt_l_d_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -48 -; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 278016 @@ -641,7 +640,7 @@ ; RV32I-NEXT: call __fixdfdi@plt ; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: blt s6, s2, .LBB12_2 +; RV32I-NEXT: bltz s6, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB12_2: # %start @@ -656,51 +655,51 @@ ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: mv s4, s2 -; RV32I-NEXT: bne a0, s2, .LBB12_6 +; RV32I-NEXT: bnez a0, .LBB12_6 ; RV32I-NEXT: # %bb.5: # %start ; RV32I-NEXT: mv s4, s6 ; RV32I-NEXT: .LBB12_6: # %start +; RV32I-NEXT: li a2, -1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __gtdf2@plt +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lui a3, 802304 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: lui s7, 524288 +; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: lui s6, 524288 -; RV32I-NEXT: blt a0, s2, .LBB12_8 +; RV32I-NEXT: bltz a0, .LBB12_8 ; RV32I-NEXT: # %bb.7: # %start ; RV32I-NEXT: mv s6, s5 ; RV32I-NEXT: .LBB12_8: # %start -; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a3, s3 -; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: bge s2, a0, .LBB12_10 +; RV32I-NEXT: bge s2, s3, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: addi s6, s7, -1 +; RV32I-NEXT: addi s6, a1, -1 ; RV32I-NEXT: .LBB12_10: # %start ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bne a0, s2, .LBB12_12 +; RV32I-NEXT: bnez a0, .LBB12_12 ; RV32I-NEXT: # %bb.11: # %start ; RV32I-NEXT: mv s2, s6 ; RV32I-NEXT: .LBB12_12: # %start ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_l_d_sat: @@ -711,47 +710,45 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a0, -481 ; RV64I-NEXT: slli a1, a0, 53 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: li s4, -1 -; RV64I-NEXT: bltz s3, .LBB12_2 +; RV64I-NEXT: li s3, -1 +; RV64I-NEXT: bltz s2, .LBB12_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: j .LBB12_3 ; RV64I-NEXT: .LBB12_2: -; RV64I-NEXT: slli s2, s4, 63 +; RV64I-NEXT: slli s1, s3, 63 ; RV64I-NEXT: .LBB12_3: # %start ; RV64I-NEXT: li a0, 543 ; RV64I-NEXT: slli a0, a0, 53 ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: bge s1, a0, .LBB12_5 +; RV64I-NEXT: blez a0, .LBB12_5 ; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srli s2, s4, 1 +; RV64I-NEXT: srli s1, s3, 1 ; RV64I-NEXT: .LBB12_5: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s1, .LBB12_7 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB12_7 ; RV64I-NEXT: # %bb.6: # %start -; RV64I-NEXT: mv s1, s2 -; RV64I-NEXT: .LBB12_7: # %start ; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: .LBB12_7: # %start ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret start: @@ -1415,40 +1412,43 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: lui a3, 790016 +; RV32I-NEXT: lui a0, 265728 +; RV32I-NEXT: addi a3, a0, -64 ; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: call __gedf2@plt +; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: lui a3, 790016 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __fixdfsi@plt -; RV32I-NEXT: lui s4, 1048568 -; RV32I-NEXT: blt s3, s0, .LBB26_2 -; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: .LBB26_2: # %start -; RV32I-NEXT: lui a0, 265728 -; RV32I-NEXT: addi a3, a0, -64 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: bge s0, a0, .LBB26_4 +; RV32I-NEXT: call __fixdfsi@plt +; RV32I-NEXT: lui s5, 1048568 +; RV32I-NEXT: bltz s4, .LBB26_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: .LBB26_2: # %start +; RV32I-NEXT: bge s0, s3, .LBB26_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 -; RV32I-NEXT: addi s4, a0, -1 +; RV32I-NEXT: addi s5, a0, -1 ; RV32I-NEXT: .LBB26_4: # %start ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bne a0, s0, .LBB26_6 +; RV32I-NEXT: bnez a0, .LBB26_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s0, s4 +; RV32I-NEXT: mv s0, s5 ; RV32I-NEXT: .LBB26_6: # %start ; RV32I-NEXT: slli a0, s0, 16 ; RV32I-NEXT: srai a0, a0, 16 @@ -1458,17 +1458,17 @@ ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_w_s_sat_i16: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a0, -505 ; RV64I-NEXT: slli a1, a0, 53 @@ -1477,37 +1477,36 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: lui s3, 1048568 +; RV64I-NEXT: lui s2, 1048568 ; RV64I-NEXT: bltz s1, .LBB26_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB26_2: # %start ; RV64I-NEXT: lui a0, 4152 ; RV64I-NEXT: addiw a0, a0, -1 ; RV64I-NEXT: slli a1, a0, 38 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: bge s2, a0, .LBB26_4 +; RV64I-NEXT: blez a0, .LBB26_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 -; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: addiw s2, a0, -1 ; RV64I-NEXT: .LBB26_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s2, .LBB26_6 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: bnez a0, .LBB26_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: .LBB26_6: # %start -; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f64(double %a) @@ -1731,37 +1730,38 @@ ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: lui a3, 787968 +; RV32I-NEXT: lui a3, 263676 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: call __gedf2@plt +; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: lui a3, 787968 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __fixdfsi@plt -; RV32I-NEXT: li s4, -128 -; RV32I-NEXT: blt s3, s0, .LBB30_2 -; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: .LBB30_2: # %start -; RV32I-NEXT: lui a3, 263676 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: li s3, 127 -; RV32I-NEXT: blt s0, a0, .LBB30_4 +; RV32I-NEXT: call __fixdfsi@plt +; RV32I-NEXT: li a1, -128 +; RV32I-NEXT: bltz s4, .LBB30_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB30_2: # %start +; RV32I-NEXT: li s4, 127 +; RV32I-NEXT: blt s0, s3, .LBB30_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s3, s4 +; RV32I-NEXT: mv s4, a1 ; RV32I-NEXT: .LBB30_4: # %start ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bne a0, s0, .LBB30_6 +; RV32I-NEXT: bnez a0, .LBB30_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s0, s3 +; RV32I-NEXT: mv s0, s4 ; RV32I-NEXT: .LBB30_6: # %start ; RV32I-NEXT: slli a0, s0, 24 ; RV32I-NEXT: srai a0, a0, 24 @@ -1776,12 +1776,11 @@ ; ; RV64I-LABEL: fcvt_w_s_sat_i8: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a0, -509 ; RV64I-NEXT: slli a1, a0, 53 @@ -1790,36 +1789,35 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: li s3, -128 +; RV64I-NEXT: li s2, -128 ; RV64I-NEXT: bltz s1, .LBB30_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB30_2: # %start ; RV64I-NEXT: lui a0, 65919 ; RV64I-NEXT: slli a1, a0, 34 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt ; RV64I-NEXT: li s1, 127 -; RV64I-NEXT: blt s2, a0, .LBB30_4 +; RV64I-NEXT: bgtz a0, .LBB30_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s1, s2 ; RV64I-NEXT: .LBB30_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s2, .LBB30_6 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: bnez a0, .LBB30_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s2, s1 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: .LBB30_6: # %start -; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i8 @llvm.fptosi.sat.i8.f64(double %a) diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -53,42 +53,40 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 847872 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: lui s4, 524288 ; RV32I-NEXT: lui s3, 524288 -; RV32I-NEXT: bltz s2, .LBB1_2 +; RV32I-NEXT: lui s2, 524288 +; RV32I-NEXT: bltz s1, .LBB1_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB1_2: # %start ; RV32I-NEXT: lui a0, 323584 ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB1_4 +; RV32I-NEXT: blez a0, .LBB1_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: addi s3, s4, -1 +; RV32I-NEXT: addi s2, s3, -1 ; RV32I-NEXT: .LBB1_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB1_6 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: bnez a1, .LBB1_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: .LBB1_6: # %start -; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -100,42 +98,40 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 847872 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: lui s3, 524288 -; RV64I-NEXT: bltz s2, .LBB1_2 +; RV64I-NEXT: lui s2, 524288 +; RV64I-NEXT: bltz s1, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB1_2: # %start ; RV64I-NEXT: lui a0, 323584 ; RV64I-NEXT: addiw a1, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB1_4 +; RV64I-NEXT: blez a0, .LBB1_4 ; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: addiw s3, s4, -1 +; RV64I-NEXT: addiw s2, s3, -1 ; RV64I-NEXT: .LBB1_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB1_6 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB1_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: .LBB1_6: # %start -; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret start: @@ -629,7 +625,7 @@ ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: mv s3, s1 -; RV32I-NEXT: bne a0, s1, .LBB12_6 +; RV32I-NEXT: bnez a0, .LBB12_6 ; RV32I-NEXT: # %bb.5: # %start ; RV32I-NEXT: mv s3, s6 ; RV32I-NEXT: .LBB12_6: # %start @@ -638,7 +634,7 @@ ; RV32I-NEXT: call __gesf2@plt ; RV32I-NEXT: lui s6, 524288 ; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: blt a0, s1, .LBB12_8 +; RV32I-NEXT: bltz a0, .LBB12_8 ; RV32I-NEXT: # %bb.7: # %start ; RV32I-NEXT: mv s5, s2 ; RV32I-NEXT: .LBB12_8: # %start @@ -652,7 +648,7 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB12_12 +; RV32I-NEXT: bnez a0, .LBB12_12 ; RV32I-NEXT: # %bb.11: # %start ; RV32I-NEXT: mv s1, s5 ; RV32I-NEXT: .LBB12_12: # %start @@ -677,44 +673,42 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 913408 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: li s4, -1 -; RV64I-NEXT: bltz s3, .LBB12_2 +; RV64I-NEXT: li s3, -1 +; RV64I-NEXT: bltz s2, .LBB12_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: j .LBB12_3 ; RV64I-NEXT: .LBB12_2: -; RV64I-NEXT: slli s2, s4, 63 +; RV64I-NEXT: slli s1, s3, 63 ; RV64I-NEXT: .LBB12_3: # %start ; RV64I-NEXT: lui a0, 389120 ; RV64I-NEXT: addiw a1, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB12_5 +; RV64I-NEXT: blez a0, .LBB12_5 ; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srli s2, s4, 1 +; RV64I-NEXT: srli s1, s3, 1 ; RV64I-NEXT: .LBB12_5: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB12_7 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB12_7 ; RV64I-NEXT: # %bb.6: # %start -; RV64I-NEXT: mv s1, s2 -; RV64I-NEXT: .LBB12_7: # %start ; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: .LBB12_7: # %start ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret start: @@ -1270,94 +1264,90 @@ ; ; RV32I-LABEL: fcvt_w_s_sat_i16: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 815104 ; RV32I-NEXT: call __gesf2@plt ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: lui s3, 1048568 +; RV32I-NEXT: lui s2, 1048568 ; RV32I-NEXT: bltz s1, .LBB24_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB24_2: # %start ; RV32I-NEXT: lui a0, 290816 ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s2, a0, .LBB24_4 +; RV32I-NEXT: blez a0, .LBB24_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: .LBB24_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s2, .LBB24_6 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: bnez a0, .LBB24_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: .LBB24_6: # %start -; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: slli a0, a1, 16 ; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_w_s_sat_i16: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 815104 ; RV64I-NEXT: call __gesf2@plt ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: lui s3, 1048568 +; RV64I-NEXT: lui s2, 1048568 ; RV64I-NEXT: bltz s1, .LBB24_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB24_2: # %start ; RV64I-NEXT: lui a0, 290816 ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s2, a0, .LBB24_4 +; RV64I-NEXT: blez a0, .LBB24_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 -; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: addiw s2, a0, -1 ; RV64I-NEXT: .LBB24_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s2, .LBB24_6 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: bnez a0, .LBB24_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: .LBB24_6: # %start -; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) @@ -1562,92 +1552,88 @@ ; ; RV32I-LABEL: fcvt_w_s_sat_i8: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 798720 ; RV32I-NEXT: call __gesf2@plt ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: li s3, -128 +; RV32I-NEXT: li s2, -128 ; RV32I-NEXT: bltz s1, .LBB28_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB28_2: # %start ; RV32I-NEXT: lui a1, 274400 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: li s1, 127 -; RV32I-NEXT: blt s2, a0, .LBB28_4 +; RV32I-NEXT: bgtz a0, .LBB28_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv s1, s2 ; RV32I-NEXT: .LBB28_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s2, .LBB28_6 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: bnez a0, .LBB28_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s2, s1 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: .LBB28_6: # %start -; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_w_s_sat_i8: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 798720 ; RV64I-NEXT: call __gesf2@plt ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: li s3, -128 +; RV64I-NEXT: li s2, -128 ; RV64I-NEXT: bltz s1, .LBB28_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB28_2: # %start ; RV64I-NEXT: lui a1, 274400 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: li s1, 127 -; RV64I-NEXT: blt s2, a0, .LBB28_4 +; RV64I-NEXT: bgtz a0, .LBB28_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s1, s2 ; RV64I-NEXT: .LBB28_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s2, .LBB28_6 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: bnez a0, .LBB28_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s2, s1 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: .LBB28_6: # %start -; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i8 @llvm.fptosi.sat.i8.f32(float %a) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -2530,37 +2530,34 @@ ; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bgez a1, .LBB35_7 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bgez a1, .LBB35_6 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bnez a1, .LBB35_8 +; RV32-NEXT: bnez a1, .LBB35_7 ; RV32-NEXT: .LBB35_2: # %entry -; RV32-NEXT: bgez a1, .LBB35_9 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: blez a2, .LBB35_8 ; RV32-NEXT: .LBB35_3: # %entry -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: blez a1, .LBB35_10 +; RV32-NEXT: beqz a2, .LBB35_5 ; RV32-NEXT: .LBB35_4: # %entry -; RV32-NEXT: beqz a1, .LBB35_6 +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB35_5: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB35_6: # %entry ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret -; RV32-NEXT: .LBB35_7: # %entry -; RV32-NEXT: li a2, -1 +; RV32-NEXT: .LBB35_6: # %entry +; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a3, -1 ; RV32-NEXT: beqz a1, .LBB35_2 +; RV32-NEXT: .LBB35_7: # %entry +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bgtz a2, .LBB35_3 ; RV32-NEXT: .LBB35_8: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bltz a1, .LBB35_3 -; RV32-NEXT: .LBB35_9: # %entry ; RV32-NEXT: li a1, 0 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bgtz a1, .LBB35_4 -; RV32-NEXT: .LBB35_10: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bnez a1, .LBB35_5 -; RV32-NEXT: j .LBB35_6 +; RV32-NEXT: bnez a2, .LBB35_4 +; RV32-NEXT: j .LBB35_5 ; ; RV64-LABEL: ustest_f16i32_mm: ; RV64: # %bb.0: # %entry @@ -3117,112 +3114,114 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a5, 8(sp) -; RV32IF-NEXT: lw a3, 20(sp) -; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a7, 8(sp) +; RV32IF-NEXT: lw a5, 12(sp) +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: addi a0, a4, -1 ; RV32IF-NEXT: li a2, -1 -; RV32IF-NEXT: mv a7, a5 -; RV32IF-NEXT: bltz a3, .LBB45_2 +; RV32IF-NEXT: mv a1, a7 +; RV32IF-NEXT: bltu a5, a0, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: li a7, -1 +; RV32IF-NEXT: li a1, -1 ; RV32IF-NEXT: .LBB45_2: # %entry -; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: addi a6, a4, -1 -; RV32IF-NEXT: mv t0, a5 -; RV32IF-NEXT: bgeu a1, a6, .LBB45_19 +; RV32IF-NEXT: lw a6, 20(sp) +; RV32IF-NEXT: mv a3, a7 +; RV32IF-NEXT: bne a5, a0, .LBB45_19 ; RV32IF-NEXT: # %bb.3: # %entry -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: bne a1, a6, .LBB45_20 +; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: bgez a6, .LBB45_20 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: or t0, a0, a3 +; RV32IF-NEXT: or t0, a1, a6 ; RV32IF-NEXT: bnez t0, .LBB45_21 ; RV32IF-NEXT: .LBB45_5: # %entry -; RV32IF-NEXT: mv a7, a1 -; RV32IF-NEXT: bgez a3, .LBB45_22 +; RV32IF-NEXT: mv a7, a5 +; RV32IF-NEXT: bgez a6, .LBB45_22 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: bgeu a1, a6, .LBB45_23 +; RV32IF-NEXT: bgeu a5, a0, .LBB45_23 ; RV32IF-NEXT: .LBB45_7: # %entry ; RV32IF-NEXT: bnez t0, .LBB45_24 ; RV32IF-NEXT: .LBB45_8: # %entry -; RV32IF-NEXT: li a6, 0 -; RV32IF-NEXT: bnez a3, .LBB45_25 +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: bnez a6, .LBB45_25 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: bgez a3, .LBB45_26 +; RV32IF-NEXT: bgez a6, .LBB45_26 ; RV32IF-NEXT: .LBB45_10: # %entry -; RV32IF-NEXT: mv a7, a5 -; RV32IF-NEXT: bgeu a4, a1, .LBB45_27 +; RV32IF-NEXT: mv t0, a5 +; RV32IF-NEXT: bltz a6, .LBB45_27 ; RV32IF-NEXT: .LBB45_11: # %entry -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: bne a1, a4, .LBB45_28 +; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: bgeu a4, a5, .LBB45_28 ; RV32IF-NEXT: .LBB45_12: # %entry -; RV32IF-NEXT: bltz a3, .LBB45_29 +; RV32IF-NEXT: and a7, a0, a6 +; RV32IF-NEXT: bne a7, a2, .LBB45_29 ; RV32IF-NEXT: .LBB45_13: # %entry -; RV32IF-NEXT: and a6, a6, a3 -; RV32IF-NEXT: bne a6, a2, .LBB45_30 +; RV32IF-NEXT: mv t0, a3 +; RV32IF-NEXT: bgeu a4, a5, .LBB45_30 ; RV32IF-NEXT: .LBB45_14: # %entry -; RV32IF-NEXT: mv a5, a1 -; RV32IF-NEXT: bltz a3, .LBB45_31 +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bne a5, a4, .LBB45_31 ; RV32IF-NEXT: .LBB45_15: # %entry -; RV32IF-NEXT: bgeu a4, a1, .LBB45_32 +; RV32IF-NEXT: bltz a6, .LBB45_32 ; RV32IF-NEXT: .LBB45_16: # %entry -; RV32IF-NEXT: beq a6, a2, .LBB45_18 +; RV32IF-NEXT: beq a7, a2, .LBB45_18 ; RV32IF-NEXT: .LBB45_17: # %entry -; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: mv a0, a3 ; RV32IF-NEXT: .LBB45_18: # %entry ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB45_19: # %entry -; RV32IF-NEXT: li t0, -1 -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: beq a1, a6, .LBB45_4 +; RV32IF-NEXT: mv a3, a1 +; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: bltz a6, .LBB45_4 ; RV32IF-NEXT: .LBB45_20: # %entry -; RV32IF-NEXT: mv a5, t0 -; RV32IF-NEXT: or t0, a0, a3 +; RV32IF-NEXT: li a7, -1 +; RV32IF-NEXT: or t0, a1, a6 ; RV32IF-NEXT: beqz t0, .LBB45_5 ; RV32IF-NEXT: .LBB45_21: # %entry -; RV32IF-NEXT: mv a5, a7 -; RV32IF-NEXT: mv a7, a1 -; RV32IF-NEXT: bltz a3, .LBB45_6 +; RV32IF-NEXT: mv a3, a7 +; RV32IF-NEXT: mv a7, a5 +; RV32IF-NEXT: bltz a6, .LBB45_6 ; RV32IF-NEXT: .LBB45_22: # %entry -; RV32IF-NEXT: mv a7, a6 -; RV32IF-NEXT: bltu a1, a6, .LBB45_7 +; RV32IF-NEXT: mv a7, a0 +; RV32IF-NEXT: bltu a5, a0, .LBB45_7 ; RV32IF-NEXT: .LBB45_23: # %entry -; RV32IF-NEXT: mv a1, a6 +; RV32IF-NEXT: mv a5, a0 ; RV32IF-NEXT: beqz t0, .LBB45_8 ; RV32IF-NEXT: .LBB45_24: # %entry -; RV32IF-NEXT: mv a1, a7 -; RV32IF-NEXT: li a6, 0 -; RV32IF-NEXT: beqz a3, .LBB45_9 +; RV32IF-NEXT: mv a5, a7 +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: beqz a6, .LBB45_9 ; RV32IF-NEXT: .LBB45_25: # %entry -; RV32IF-NEXT: srai a6, a3, 31 -; RV32IF-NEXT: and a6, a6, a0 -; RV32IF-NEXT: bltz a3, .LBB45_10 +; RV32IF-NEXT: srai a0, a6, 31 +; RV32IF-NEXT: and a0, a0, a1 +; RV32IF-NEXT: bltz a6, .LBB45_10 ; RV32IF-NEXT: .LBB45_26: # %entry -; RV32IF-NEXT: li a3, 0 -; RV32IF-NEXT: mv a7, a5 -; RV32IF-NEXT: bltu a4, a1, .LBB45_11 +; RV32IF-NEXT: li a6, 0 +; RV32IF-NEXT: mv t0, a5 +; RV32IF-NEXT: bgez a6, .LBB45_11 ; RV32IF-NEXT: .LBB45_27: # %entry -; RV32IF-NEXT: li a7, 0 -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: beq a1, a4, .LBB45_12 +; RV32IF-NEXT: lui t0, 524288 +; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: bltu a4, a5, .LBB45_12 ; RV32IF-NEXT: .LBB45_28: # %entry -; RV32IF-NEXT: mv a0, a7 -; RV32IF-NEXT: bgez a3, .LBB45_13 +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: and a7, a0, a6 +; RV32IF-NEXT: beq a7, a2, .LBB45_13 ; RV32IF-NEXT: .LBB45_29: # %entry -; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: and a6, a6, a3 -; RV32IF-NEXT: beq a6, a2, .LBB45_14 +; RV32IF-NEXT: mv a1, t0 +; RV32IF-NEXT: mv t0, a3 +; RV32IF-NEXT: bltu a4, a5, .LBB45_14 ; RV32IF-NEXT: .LBB45_30: # %entry -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: mv a5, a1 -; RV32IF-NEXT: bgez a3, .LBB45_15 +; RV32IF-NEXT: li t0, 0 +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: beq a5, a4, .LBB45_15 ; RV32IF-NEXT: .LBB45_31: # %entry -; RV32IF-NEXT: lui a5, 524288 -; RV32IF-NEXT: bltu a4, a1, .LBB45_16 +; RV32IF-NEXT: mv a0, t0 +; RV32IF-NEXT: bgez a6, .LBB45_16 ; RV32IF-NEXT: .LBB45_32: # %entry -; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: bne a6, a2, .LBB45_17 +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: bne a7, a2, .LBB45_17 ; RV32IF-NEXT: j .LBB45_18 ; ; RV64IF-LABEL: stest_f64i64_mm: @@ -3284,112 +3283,114 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a5, 8(sp) -; RV32IFD-NEXT: lw a3, 20(sp) -; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a7, 8(sp) +; RV32IFD-NEXT: lw a5, 12(sp) +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: addi a0, a4, -1 ; RV32IFD-NEXT: li a2, -1 -; RV32IFD-NEXT: mv a7, a5 -; RV32IFD-NEXT: bltz a3, .LBB45_2 +; RV32IFD-NEXT: mv a1, a7 +; RV32IFD-NEXT: bltu a5, a0, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: li a7, -1 +; RV32IFD-NEXT: li a1, -1 ; RV32IFD-NEXT: .LBB45_2: # %entry -; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: addi a6, a4, -1 -; RV32IFD-NEXT: mv t0, a5 -; RV32IFD-NEXT: bgeu a1, a6, .LBB45_19 +; RV32IFD-NEXT: lw a6, 20(sp) +; RV32IFD-NEXT: mv a3, a7 +; RV32IFD-NEXT: bne a5, a0, .LBB45_19 ; RV32IFD-NEXT: # %bb.3: # %entry -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: bne a1, a6, .LBB45_20 +; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: bgez a6, .LBB45_20 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: or t0, a0, a3 +; RV32IFD-NEXT: or t0, a1, a6 ; RV32IFD-NEXT: bnez t0, .LBB45_21 ; RV32IFD-NEXT: .LBB45_5: # %entry -; RV32IFD-NEXT: mv a7, a1 -; RV32IFD-NEXT: bgez a3, .LBB45_22 +; RV32IFD-NEXT: mv a7, a5 +; RV32IFD-NEXT: bgez a6, .LBB45_22 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: bgeu a1, a6, .LBB45_23 +; RV32IFD-NEXT: bgeu a5, a0, .LBB45_23 ; RV32IFD-NEXT: .LBB45_7: # %entry ; RV32IFD-NEXT: bnez t0, .LBB45_24 ; RV32IFD-NEXT: .LBB45_8: # %entry -; RV32IFD-NEXT: li a6, 0 -; RV32IFD-NEXT: bnez a3, .LBB45_25 +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: bnez a6, .LBB45_25 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: bgez a3, .LBB45_26 +; RV32IFD-NEXT: bgez a6, .LBB45_26 ; RV32IFD-NEXT: .LBB45_10: # %entry -; RV32IFD-NEXT: mv a7, a5 -; RV32IFD-NEXT: bgeu a4, a1, .LBB45_27 +; RV32IFD-NEXT: mv t0, a5 +; RV32IFD-NEXT: bltz a6, .LBB45_27 ; RV32IFD-NEXT: .LBB45_11: # %entry -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: bne a1, a4, .LBB45_28 +; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: bgeu a4, a5, .LBB45_28 ; RV32IFD-NEXT: .LBB45_12: # %entry -; RV32IFD-NEXT: bltz a3, .LBB45_29 +; RV32IFD-NEXT: and a7, a0, a6 +; RV32IFD-NEXT: bne a7, a2, .LBB45_29 ; RV32IFD-NEXT: .LBB45_13: # %entry -; RV32IFD-NEXT: and a6, a6, a3 -; RV32IFD-NEXT: bne a6, a2, .LBB45_30 +; RV32IFD-NEXT: mv t0, a3 +; RV32IFD-NEXT: bgeu a4, a5, .LBB45_30 ; RV32IFD-NEXT: .LBB45_14: # %entry -; RV32IFD-NEXT: mv a5, a1 -; RV32IFD-NEXT: bltz a3, .LBB45_31 +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bne a5, a4, .LBB45_31 ; RV32IFD-NEXT: .LBB45_15: # %entry -; RV32IFD-NEXT: bgeu a4, a1, .LBB45_32 +; RV32IFD-NEXT: bltz a6, .LBB45_32 ; RV32IFD-NEXT: .LBB45_16: # %entry -; RV32IFD-NEXT: beq a6, a2, .LBB45_18 +; RV32IFD-NEXT: beq a7, a2, .LBB45_18 ; RV32IFD-NEXT: .LBB45_17: # %entry -; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: mv a0, a3 ; RV32IFD-NEXT: .LBB45_18: # %entry ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB45_19: # %entry -; RV32IFD-NEXT: li t0, -1 -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: beq a1, a6, .LBB45_4 +; RV32IFD-NEXT: mv a3, a1 +; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: bltz a6, .LBB45_4 ; RV32IFD-NEXT: .LBB45_20: # %entry -; RV32IFD-NEXT: mv a5, t0 -; RV32IFD-NEXT: or t0, a0, a3 +; RV32IFD-NEXT: li a7, -1 +; RV32IFD-NEXT: or t0, a1, a6 ; RV32IFD-NEXT: beqz t0, .LBB45_5 ; RV32IFD-NEXT: .LBB45_21: # %entry -; RV32IFD-NEXT: mv a5, a7 -; RV32IFD-NEXT: mv a7, a1 -; RV32IFD-NEXT: bltz a3, .LBB45_6 +; RV32IFD-NEXT: mv a3, a7 +; RV32IFD-NEXT: mv a7, a5 +; RV32IFD-NEXT: bltz a6, .LBB45_6 ; RV32IFD-NEXT: .LBB45_22: # %entry -; RV32IFD-NEXT: mv a7, a6 -; RV32IFD-NEXT: bltu a1, a6, .LBB45_7 +; RV32IFD-NEXT: mv a7, a0 +; RV32IFD-NEXT: bltu a5, a0, .LBB45_7 ; RV32IFD-NEXT: .LBB45_23: # %entry -; RV32IFD-NEXT: mv a1, a6 +; RV32IFD-NEXT: mv a5, a0 ; RV32IFD-NEXT: beqz t0, .LBB45_8 ; RV32IFD-NEXT: .LBB45_24: # %entry -; RV32IFD-NEXT: mv a1, a7 -; RV32IFD-NEXT: li a6, 0 -; RV32IFD-NEXT: beqz a3, .LBB45_9 +; RV32IFD-NEXT: mv a5, a7 +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: beqz a6, .LBB45_9 ; RV32IFD-NEXT: .LBB45_25: # %entry -; RV32IFD-NEXT: srai a6, a3, 31 -; RV32IFD-NEXT: and a6, a6, a0 -; RV32IFD-NEXT: bltz a3, .LBB45_10 +; RV32IFD-NEXT: srai a0, a6, 31 +; RV32IFD-NEXT: and a0, a0, a1 +; RV32IFD-NEXT: bltz a6, .LBB45_10 ; RV32IFD-NEXT: .LBB45_26: # %entry -; RV32IFD-NEXT: li a3, 0 -; RV32IFD-NEXT: mv a7, a5 -; RV32IFD-NEXT: bltu a4, a1, .LBB45_11 +; RV32IFD-NEXT: li a6, 0 +; RV32IFD-NEXT: mv t0, a5 +; RV32IFD-NEXT: bgez a6, .LBB45_11 ; RV32IFD-NEXT: .LBB45_27: # %entry -; RV32IFD-NEXT: li a7, 0 -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: beq a1, a4, .LBB45_12 +; RV32IFD-NEXT: lui t0, 524288 +; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: bltu a4, a5, .LBB45_12 ; RV32IFD-NEXT: .LBB45_28: # %entry -; RV32IFD-NEXT: mv a0, a7 -; RV32IFD-NEXT: bgez a3, .LBB45_13 +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: and a7, a0, a6 +; RV32IFD-NEXT: beq a7, a2, .LBB45_13 ; RV32IFD-NEXT: .LBB45_29: # %entry -; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: and a6, a6, a3 -; RV32IFD-NEXT: beq a6, a2, .LBB45_14 +; RV32IFD-NEXT: mv a1, t0 +; RV32IFD-NEXT: mv t0, a3 +; RV32IFD-NEXT: bltu a4, a5, .LBB45_14 ; RV32IFD-NEXT: .LBB45_30: # %entry -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: mv a5, a1 -; RV32IFD-NEXT: bgez a3, .LBB45_15 +; RV32IFD-NEXT: li t0, 0 +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: beq a5, a4, .LBB45_15 ; RV32IFD-NEXT: .LBB45_31: # %entry -; RV32IFD-NEXT: lui a5, 524288 -; RV32IFD-NEXT: bltu a4, a1, .LBB45_16 +; RV32IFD-NEXT: mv a0, t0 +; RV32IFD-NEXT: bgez a6, .LBB45_16 ; RV32IFD-NEXT: .LBB45_32: # %entry -; RV32IFD-NEXT: lui a1, 524288 -; RV32IFD-NEXT: bne a6, a2, .LBB45_17 +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: bne a7, a2, .LBB45_17 ; RV32IFD-NEXT: j .LBB45_18 ; ; RV64IFD-LABEL: stest_f64i64_mm: @@ -3425,31 +3426,31 @@ ; RV32IF-NEXT: beqz a0, .LBB46_3 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: beq a2, a1, .LBB46_4 +; RV32IF-NEXT: beqz a2, .LBB46_4 ; RV32IF-NEXT: .LBB46_2: ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: j .LBB46_5 ; RV32IF-NEXT: .LBB46_3: ; RV32IF-NEXT: seqz a2, a3 -; RV32IF-NEXT: bne a2, a1, .LBB46_2 +; RV32IF-NEXT: bnez a2, .LBB46_2 ; RV32IF-NEXT: .LBB46_4: # %entry ; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB46_5: # %entry ; RV32IF-NEXT: xori a3, a3, 1 ; RV32IF-NEXT: or a3, a3, a0 ; RV32IF-NEXT: mv a0, a1 -; RV32IF-NEXT: beq a3, a1, .LBB46_7 +; RV32IF-NEXT: beqz a3, .LBB46_7 ; RV32IF-NEXT: # %bb.6: # %entry ; RV32IF-NEXT: mv a0, a4 ; RV32IF-NEXT: .LBB46_7: # %entry -; RV32IF-NEXT: bne a2, a1, .LBB46_9 +; RV32IF-NEXT: bnez a2, .LBB46_9 ; RV32IF-NEXT: # %bb.8: # %entry ; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: bne a3, a1, .LBB46_10 +; RV32IF-NEXT: bnez a3, .LBB46_10 ; RV32IF-NEXT: j .LBB46_11 ; RV32IF-NEXT: .LBB46_9: ; RV32IF-NEXT: lw a2, 12(sp) -; RV32IF-NEXT: beq a3, a1, .LBB46_11 +; RV32IF-NEXT: beqz a3, .LBB46_11 ; RV32IF-NEXT: .LBB46_10: # %entry ; RV32IF-NEXT: mv a1, a2 ; RV32IF-NEXT: .LBB46_11: # %entry @@ -3493,31 +3494,31 @@ ; RV32IFD-NEXT: beqz a0, .LBB46_3 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: mv a2, a1 -; RV32IFD-NEXT: beq a2, a1, .LBB46_4 +; RV32IFD-NEXT: beqz a2, .LBB46_4 ; RV32IFD-NEXT: .LBB46_2: ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: j .LBB46_5 ; RV32IFD-NEXT: .LBB46_3: ; RV32IFD-NEXT: seqz a2, a3 -; RV32IFD-NEXT: bne a2, a1, .LBB46_2 +; RV32IFD-NEXT: bnez a2, .LBB46_2 ; RV32IFD-NEXT: .LBB46_4: # %entry ; RV32IFD-NEXT: mv a4, a1 ; RV32IFD-NEXT: .LBB46_5: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 ; RV32IFD-NEXT: or a3, a3, a0 ; RV32IFD-NEXT: mv a0, a1 -; RV32IFD-NEXT: beq a3, a1, .LBB46_7 +; RV32IFD-NEXT: beqz a3, .LBB46_7 ; RV32IFD-NEXT: # %bb.6: # %entry ; RV32IFD-NEXT: mv a0, a4 ; RV32IFD-NEXT: .LBB46_7: # %entry -; RV32IFD-NEXT: bne a2, a1, .LBB46_9 +; RV32IFD-NEXT: bnez a2, .LBB46_9 ; RV32IFD-NEXT: # %bb.8: # %entry ; RV32IFD-NEXT: mv a2, a1 -; RV32IFD-NEXT: bne a3, a1, .LBB46_10 +; RV32IFD-NEXT: bnez a3, .LBB46_10 ; RV32IFD-NEXT: j .LBB46_11 ; RV32IFD-NEXT: .LBB46_9: ; RV32IFD-NEXT: lw a2, 12(sp) -; RV32IFD-NEXT: beq a3, a1, .LBB46_11 +; RV32IFD-NEXT: beqz a3, .LBB46_11 ; RV32IFD-NEXT: .LBB46_10: # %entry ; RV32IFD-NEXT: mv a1, a2 ; RV32IFD-NEXT: .LBB46_11: # %entry @@ -3542,91 +3543,86 @@ ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a0, 16(sp) ; RV32IF-NEXT: lw a2, 20(sp) -; RV32IF-NEXT: li a1, 1 -; RV32IF-NEXT: mv a4, a0 -; RV32IF-NEXT: bgez a2, .LBB47_5 +; RV32IF-NEXT: lw a3, 16(sp) +; RV32IF-NEXT: beqz a2, .LBB47_3 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bgeu a0, a1, .LBB47_6 -; RV32IF-NEXT: .LBB47_2: # %entry -; RV32IF-NEXT: beqz a2, .LBB47_7 -; RV32IF-NEXT: .LBB47_3: # %entry -; RV32IF-NEXT: slti a1, a2, 0 -; RV32IF-NEXT: mv a3, a4 -; RV32IF-NEXT: beqz a1, .LBB47_8 -; RV32IF-NEXT: .LBB47_4: -; RV32IF-NEXT: lw a5, 8(sp) -; RV32IF-NEXT: j .LBB47_9 +; RV32IF-NEXT: slti a0, a2, 0 +; RV32IF-NEXT: beqz a0, .LBB47_4 +; RV32IF-NEXT: .LBB47_2: +; RV32IF-NEXT: lw a5, 12(sp) +; RV32IF-NEXT: j .LBB47_5 +; RV32IF-NEXT: .LBB47_3: +; RV32IF-NEXT: seqz a0, a3 +; RV32IF-NEXT: bnez a0, .LBB47_2 +; RV32IF-NEXT: .LBB47_4: # %entry +; RV32IF-NEXT: li a5, 0 ; RV32IF-NEXT: .LBB47_5: # %entry -; RV32IF-NEXT: li a4, 1 -; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bltu a0, a1, .LBB47_2 -; RV32IF-NEXT: .LBB47_6: # %entry -; RV32IF-NEXT: li a3, 1 -; RV32IF-NEXT: bnez a2, .LBB47_3 -; RV32IF-NEXT: .LBB47_7: -; RV32IF-NEXT: seqz a1, a0 -; RV32IF-NEXT: bnez a1, .LBB47_4 -; RV32IF-NEXT: .LBB47_8: # %entry +; RV32IF-NEXT: xori a1, a3, 1 +; RV32IF-NEXT: or a4, a1, a2 +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB47_7 +; RV32IF-NEXT: # %bb.6: # %entry +; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: .LBB47_7: # %entry +; RV32IF-NEXT: bnez a0, .LBB47_9 +; RV32IF-NEXT: # %bb.8: # %entry ; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: .LBB47_9: # %entry -; RV32IF-NEXT: xori a0, a0, 1 -; RV32IF-NEXT: or a0, a0, a2 -; RV32IF-NEXT: li a4, 0 -; RV32IF-NEXT: beqz a0, .LBB47_11 -; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: mv a4, a5 +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: bnez a4, .LBB47_10 +; RV32IF-NEXT: j .LBB47_11 +; RV32IF-NEXT: .LBB47_9: +; RV32IF-NEXT: lw a5, 8(sp) +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: beqz a4, .LBB47_11 +; RV32IF-NEXT: .LBB47_10: # %entry +; RV32IF-NEXT: mv a0, a5 ; RV32IF-NEXT: .LBB47_11: # %entry -; RV32IF-NEXT: bnez a1, .LBB47_13 +; RV32IF-NEXT: li a5, 1 +; RV32IF-NEXT: mv a4, a3 +; RV32IF-NEXT: bgez a2, .LBB47_17 ; RV32IF-NEXT: # %bb.12: # %entry -; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: bnez a0, .LBB47_14 -; RV32IF-NEXT: j .LBB47_15 -; RV32IF-NEXT: .LBB47_13: -; RV32IF-NEXT: lw a5, 12(sp) -; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a0, .LBB47_15 +; RV32IF-NEXT: bgeu a3, a5, .LBB47_18 +; RV32IF-NEXT: .LBB47_13: # %entry +; RV32IF-NEXT: bnez a2, .LBB47_19 ; RV32IF-NEXT: .LBB47_14: # %entry -; RV32IF-NEXT: mv a1, a5 -; RV32IF-NEXT: .LBB47_15: # %entry ; RV32IF-NEXT: bgez a2, .LBB47_20 -; RV32IF-NEXT: # %bb.16: # %entry -; RV32IF-NEXT: mv a5, a4 -; RV32IF-NEXT: beqz a1, .LBB47_21 +; RV32IF-NEXT: .LBB47_15: # %entry +; RV32IF-NEXT: beqz a2, .LBB47_21 +; RV32IF-NEXT: .LBB47_16: # %entry +; RV32IF-NEXT: sgtz a4, a2 +; RV32IF-NEXT: mv a5, a0 +; RV32IF-NEXT: beqz a4, .LBB47_22 +; RV32IF-NEXT: j .LBB47_23 ; RV32IF-NEXT: .LBB47_17: # %entry -; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: bnez a1, .LBB47_22 +; RV32IF-NEXT: li a4, 1 +; RV32IF-NEXT: bltu a3, a5, .LBB47_13 ; RV32IF-NEXT: .LBB47_18: # %entry -; RV32IF-NEXT: beqz a2, .LBB47_23 +; RV32IF-NEXT: li a3, 1 +; RV32IF-NEXT: beqz a2, .LBB47_14 ; RV32IF-NEXT: .LBB47_19: # %entry -; RV32IF-NEXT: sgtz a5, a2 -; RV32IF-NEXT: beqz a5, .LBB47_24 -; RV32IF-NEXT: j .LBB47_25 +; RV32IF-NEXT: mv a3, a4 +; RV32IF-NEXT: bltz a2, .LBB47_15 ; RV32IF-NEXT: .LBB47_20: # %entry ; RV32IF-NEXT: li a2, 0 -; RV32IF-NEXT: mv a5, a4 -; RV32IF-NEXT: bnez a1, .LBB47_17 -; RV32IF-NEXT: .LBB47_21: # %entry -; RV32IF-NEXT: li a5, 0 -; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: beqz a1, .LBB47_18 +; RV32IF-NEXT: bnez a2, .LBB47_16 +; RV32IF-NEXT: .LBB47_21: +; RV32IF-NEXT: snez a4, a3 +; RV32IF-NEXT: mv a5, a0 +; RV32IF-NEXT: bnez a4, .LBB47_23 ; RV32IF-NEXT: .LBB47_22: # %entry -; RV32IF-NEXT: mv a0, a5 -; RV32IF-NEXT: bnez a2, .LBB47_19 -; RV32IF-NEXT: .LBB47_23: -; RV32IF-NEXT: snez a5, a3 -; RV32IF-NEXT: bnez a5, .LBB47_25 -; RV32IF-NEXT: .LBB47_24: # %entry -; RV32IF-NEXT: li a4, 0 +; RV32IF-NEXT: li a5, 0 +; RV32IF-NEXT: .LBB47_23: # %entry +; RV32IF-NEXT: mv a6, a0 +; RV32IF-NEXT: beqz a1, .LBB47_30 +; RV32IF-NEXT: # %bb.24: # %entry +; RV32IF-NEXT: bnez a1, .LBB47_31 ; RV32IF-NEXT: .LBB47_25: # %entry ; RV32IF-NEXT: or a2, a3, a2 -; RV32IF-NEXT: bnez a2, .LBB47_30 -; RV32IF-NEXT: # %bb.26: # %entry +; RV32IF-NEXT: bnez a2, .LBB47_32 +; RV32IF-NEXT: .LBB47_26: # %entry ; RV32IF-NEXT: mv a3, a1 -; RV32IF-NEXT: beqz a5, .LBB47_31 +; RV32IF-NEXT: beqz a4, .LBB47_33 ; RV32IF-NEXT: .LBB47_27: # %entry ; RV32IF-NEXT: beqz a2, .LBB47_29 ; RV32IF-NEXT: .LBB47_28: # %entry @@ -3636,10 +3632,17 @@ ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB47_30: # %entry -; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: mv a3, a1 -; RV32IF-NEXT: bnez a5, .LBB47_27 +; RV32IF-NEXT: li a6, 0 +; RV32IF-NEXT: beqz a1, .LBB47_25 ; RV32IF-NEXT: .LBB47_31: # %entry +; RV32IF-NEXT: mv a0, a6 +; RV32IF-NEXT: or a2, a3, a2 +; RV32IF-NEXT: beqz a2, .LBB47_26 +; RV32IF-NEXT: .LBB47_32: # %entry +; RV32IF-NEXT: mv a0, a5 +; RV32IF-NEXT: mv a3, a1 +; RV32IF-NEXT: bnez a4, .LBB47_27 +; RV32IF-NEXT: .LBB47_33: # %entry ; RV32IF-NEXT: li a3, 0 ; RV32IF-NEXT: bnez a2, .LBB47_28 ; RV32IF-NEXT: j .LBB47_29 @@ -3691,91 +3694,86 @@ ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a0, 16(sp) ; RV32IFD-NEXT: lw a2, 20(sp) -; RV32IFD-NEXT: li a1, 1 -; RV32IFD-NEXT: mv a4, a0 -; RV32IFD-NEXT: bgez a2, .LBB47_5 +; RV32IFD-NEXT: lw a3, 16(sp) +; RV32IFD-NEXT: beqz a2, .LBB47_3 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: mv a3, a0 -; RV32IFD-NEXT: bgeu a0, a1, .LBB47_6 -; RV32IFD-NEXT: .LBB47_2: # %entry -; RV32IFD-NEXT: beqz a2, .LBB47_7 -; RV32IFD-NEXT: .LBB47_3: # %entry -; RV32IFD-NEXT: slti a1, a2, 0 -; RV32IFD-NEXT: mv a3, a4 -; RV32IFD-NEXT: beqz a1, .LBB47_8 -; RV32IFD-NEXT: .LBB47_4: -; RV32IFD-NEXT: lw a5, 8(sp) -; RV32IFD-NEXT: j .LBB47_9 +; RV32IFD-NEXT: slti a0, a2, 0 +; RV32IFD-NEXT: beqz a0, .LBB47_4 +; RV32IFD-NEXT: .LBB47_2: +; RV32IFD-NEXT: lw a5, 12(sp) +; RV32IFD-NEXT: j .LBB47_5 +; RV32IFD-NEXT: .LBB47_3: +; RV32IFD-NEXT: seqz a0, a3 +; RV32IFD-NEXT: bnez a0, .LBB47_2 +; RV32IFD-NEXT: .LBB47_4: # %entry +; RV32IFD-NEXT: li a5, 0 ; RV32IFD-NEXT: .LBB47_5: # %entry -; RV32IFD-NEXT: li a4, 1 -; RV32IFD-NEXT: mv a3, a0 -; RV32IFD-NEXT: bltu a0, a1, .LBB47_2 -; RV32IFD-NEXT: .LBB47_6: # %entry -; RV32IFD-NEXT: li a3, 1 -; RV32IFD-NEXT: bnez a2, .LBB47_3 -; RV32IFD-NEXT: .LBB47_7: -; RV32IFD-NEXT: seqz a1, a0 -; RV32IFD-NEXT: bnez a1, .LBB47_4 -; RV32IFD-NEXT: .LBB47_8: # %entry +; RV32IFD-NEXT: xori a1, a3, 1 +; RV32IFD-NEXT: or a4, a1, a2 +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB47_7 +; RV32IFD-NEXT: # %bb.6: # %entry +; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: .LBB47_7: # %entry +; RV32IFD-NEXT: bnez a0, .LBB47_9 +; RV32IFD-NEXT: # %bb.8: # %entry ; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: .LBB47_9: # %entry -; RV32IFD-NEXT: xori a0, a0, 1 -; RV32IFD-NEXT: or a0, a0, a2 -; RV32IFD-NEXT: li a4, 0 -; RV32IFD-NEXT: beqz a0, .LBB47_11 -; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: mv a4, a5 +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: bnez a4, .LBB47_10 +; RV32IFD-NEXT: j .LBB47_11 +; RV32IFD-NEXT: .LBB47_9: +; RV32IFD-NEXT: lw a5, 8(sp) +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: beqz a4, .LBB47_11 +; RV32IFD-NEXT: .LBB47_10: # %entry +; RV32IFD-NEXT: mv a0, a5 ; RV32IFD-NEXT: .LBB47_11: # %entry -; RV32IFD-NEXT: bnez a1, .LBB47_13 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: mv a4, a3 +; RV32IFD-NEXT: bgez a2, .LBB47_17 ; RV32IFD-NEXT: # %bb.12: # %entry -; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: bnez a0, .LBB47_14 -; RV32IFD-NEXT: j .LBB47_15 -; RV32IFD-NEXT: .LBB47_13: -; RV32IFD-NEXT: lw a5, 12(sp) -; RV32IFD-NEXT: li a1, 0 -; RV32IFD-NEXT: beqz a0, .LBB47_15 +; RV32IFD-NEXT: bgeu a3, a5, .LBB47_18 +; RV32IFD-NEXT: .LBB47_13: # %entry +; RV32IFD-NEXT: bnez a2, .LBB47_19 ; RV32IFD-NEXT: .LBB47_14: # %entry -; RV32IFD-NEXT: mv a1, a5 -; RV32IFD-NEXT: .LBB47_15: # %entry ; RV32IFD-NEXT: bgez a2, .LBB47_20 -; RV32IFD-NEXT: # %bb.16: # %entry -; RV32IFD-NEXT: mv a5, a4 -; RV32IFD-NEXT: beqz a1, .LBB47_21 +; RV32IFD-NEXT: .LBB47_15: # %entry +; RV32IFD-NEXT: beqz a2, .LBB47_21 +; RV32IFD-NEXT: .LBB47_16: # %entry +; RV32IFD-NEXT: sgtz a4, a2 +; RV32IFD-NEXT: mv a5, a0 +; RV32IFD-NEXT: beqz a4, .LBB47_22 +; RV32IFD-NEXT: j .LBB47_23 ; RV32IFD-NEXT: .LBB47_17: # %entry -; RV32IFD-NEXT: mv a0, a4 -; RV32IFD-NEXT: bnez a1, .LBB47_22 +; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: bltu a3, a5, .LBB47_13 ; RV32IFD-NEXT: .LBB47_18: # %entry -; RV32IFD-NEXT: beqz a2, .LBB47_23 +; RV32IFD-NEXT: li a3, 1 +; RV32IFD-NEXT: beqz a2, .LBB47_14 ; RV32IFD-NEXT: .LBB47_19: # %entry -; RV32IFD-NEXT: sgtz a5, a2 -; RV32IFD-NEXT: beqz a5, .LBB47_24 -; RV32IFD-NEXT: j .LBB47_25 +; RV32IFD-NEXT: mv a3, a4 +; RV32IFD-NEXT: bltz a2, .LBB47_15 ; RV32IFD-NEXT: .LBB47_20: # %entry ; RV32IFD-NEXT: li a2, 0 -; RV32IFD-NEXT: mv a5, a4 -; RV32IFD-NEXT: bnez a1, .LBB47_17 -; RV32IFD-NEXT: .LBB47_21: # %entry -; RV32IFD-NEXT: li a5, 0 -; RV32IFD-NEXT: mv a0, a4 -; RV32IFD-NEXT: beqz a1, .LBB47_18 +; RV32IFD-NEXT: bnez a2, .LBB47_16 +; RV32IFD-NEXT: .LBB47_21: +; RV32IFD-NEXT: snez a4, a3 +; RV32IFD-NEXT: mv a5, a0 +; RV32IFD-NEXT: bnez a4, .LBB47_23 ; RV32IFD-NEXT: .LBB47_22: # %entry -; RV32IFD-NEXT: mv a0, a5 -; RV32IFD-NEXT: bnez a2, .LBB47_19 -; RV32IFD-NEXT: .LBB47_23: -; RV32IFD-NEXT: snez a5, a3 -; RV32IFD-NEXT: bnez a5, .LBB47_25 -; RV32IFD-NEXT: .LBB47_24: # %entry -; RV32IFD-NEXT: li a4, 0 +; RV32IFD-NEXT: li a5, 0 +; RV32IFD-NEXT: .LBB47_23: # %entry +; RV32IFD-NEXT: mv a6, a0 +; RV32IFD-NEXT: beqz a1, .LBB47_30 +; RV32IFD-NEXT: # %bb.24: # %entry +; RV32IFD-NEXT: bnez a1, .LBB47_31 ; RV32IFD-NEXT: .LBB47_25: # %entry ; RV32IFD-NEXT: or a2, a3, a2 -; RV32IFD-NEXT: bnez a2, .LBB47_30 -; RV32IFD-NEXT: # %bb.26: # %entry +; RV32IFD-NEXT: bnez a2, .LBB47_32 +; RV32IFD-NEXT: .LBB47_26: # %entry ; RV32IFD-NEXT: mv a3, a1 -; RV32IFD-NEXT: beqz a5, .LBB47_31 +; RV32IFD-NEXT: beqz a4, .LBB47_33 ; RV32IFD-NEXT: .LBB47_27: # %entry ; RV32IFD-NEXT: beqz a2, .LBB47_29 ; RV32IFD-NEXT: .LBB47_28: # %entry @@ -3785,10 +3783,17 @@ ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB47_30: # %entry -; RV32IFD-NEXT: mv a0, a4 -; RV32IFD-NEXT: mv a3, a1 -; RV32IFD-NEXT: bnez a5, .LBB47_27 +; RV32IFD-NEXT: li a6, 0 +; RV32IFD-NEXT: beqz a1, .LBB47_25 ; RV32IFD-NEXT: .LBB47_31: # %entry +; RV32IFD-NEXT: mv a0, a6 +; RV32IFD-NEXT: or a2, a3, a2 +; RV32IFD-NEXT: beqz a2, .LBB47_26 +; RV32IFD-NEXT: .LBB47_32: # %entry +; RV32IFD-NEXT: mv a0, a5 +; RV32IFD-NEXT: mv a3, a1 +; RV32IFD-NEXT: bnez a4, .LBB47_27 +; RV32IFD-NEXT: .LBB47_33: # %entry ; RV32IFD-NEXT: li a3, 0 ; RV32IFD-NEXT: bnez a2, .LBB47_28 ; RV32IFD-NEXT: j .LBB47_29 @@ -3809,112 +3814,114 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: lw a3, 20(sp) -; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a7, 8(sp) +; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a0, a4, -1 ; RV32-NEXT: li a2, -1 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltz a3, .LBB48_2 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: bltu a5, a0, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a7, -1 +; RV32-NEXT: li a1, -1 ; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a6, a4, -1 -; RV32-NEXT: mv t0, a5 -; RV32-NEXT: bgeu a1, a6, .LBB48_19 +; RV32-NEXT: lw a6, 20(sp) +; RV32-NEXT: mv a3, a7 +; RV32-NEXT: bne a5, a0, .LBB48_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: bne a1, a6, .LBB48_20 +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: bgez a6, .LBB48_20 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: or t0, a1, a6 ; RV32-NEXT: bnez t0, .LBB48_21 ; RV32-NEXT: .LBB48_5: # %entry -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bgez a3, .LBB48_22 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgez a6, .LBB48_22 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: bgeu a1, a6, .LBB48_23 +; RV32-NEXT: bgeu a5, a0, .LBB48_23 ; RV32-NEXT: .LBB48_7: # %entry ; RV32-NEXT: bnez t0, .LBB48_24 ; RV32-NEXT: .LBB48_8: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: bnez a3, .LBB48_25 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: bnez a6, .LBB48_25 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: bgez a3, .LBB48_26 +; RV32-NEXT: bgez a6, .LBB48_26 ; RV32-NEXT: .LBB48_10: # %entry -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bgeu a4, a1, .LBB48_27 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bltz a6, .LBB48_27 ; RV32-NEXT: .LBB48_11: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a4, .LBB48_28 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: bgeu a4, a5, .LBB48_28 ; RV32-NEXT: .LBB48_12: # %entry -; RV32-NEXT: bltz a3, .LBB48_29 +; RV32-NEXT: and a7, a0, a6 +; RV32-NEXT: bne a7, a2, .LBB48_29 ; RV32-NEXT: .LBB48_13: # %entry -; RV32-NEXT: and a6, a6, a3 -; RV32-NEXT: bne a6, a2, .LBB48_30 +; RV32-NEXT: mv t0, a3 +; RV32-NEXT: bgeu a4, a5, .LBB48_30 ; RV32-NEXT: .LBB48_14: # %entry -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: bltz a3, .LBB48_31 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: bne a5, a4, .LBB48_31 ; RV32-NEXT: .LBB48_15: # %entry -; RV32-NEXT: bgeu a4, a1, .LBB48_32 +; RV32-NEXT: bltz a6, .LBB48_32 ; RV32-NEXT: .LBB48_16: # %entry -; RV32-NEXT: beq a6, a2, .LBB48_18 +; RV32-NEXT: beq a7, a2, .LBB48_18 ; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a0, a3 ; RV32-NEXT: .LBB48_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: li t0, -1 -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: beq a1, a6, .LBB48_4 +; RV32-NEXT: mv a3, a1 +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: bltz a6, .LBB48_4 ; RV32-NEXT: .LBB48_20: # %entry -; RV32-NEXT: mv a5, t0 -; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: li a7, -1 +; RV32-NEXT: or t0, a1, a6 ; RV32-NEXT: beqz t0, .LBB48_5 ; RV32-NEXT: .LBB48_21: # %entry -; RV32-NEXT: mv a5, a7 -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bltz a3, .LBB48_6 +; RV32-NEXT: mv a3, a7 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltz a6, .LBB48_6 ; RV32-NEXT: .LBB48_22: # %entry -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: bltu a1, a6, .LBB48_7 +; RV32-NEXT: mv a7, a0 +; RV32-NEXT: bltu a5, a0, .LBB48_7 ; RV32-NEXT: .LBB48_23: # %entry -; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a5, a0 ; RV32-NEXT: beqz t0, .LBB48_8 ; RV32-NEXT: .LBB48_24: # %entry -; RV32-NEXT: mv a1, a7 -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a3, .LBB48_9 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: beqz a6, .LBB48_9 ; RV32-NEXT: .LBB48_25: # %entry -; RV32-NEXT: srai a6, a3, 31 -; RV32-NEXT: and a6, a6, a0 -; RV32-NEXT: bltz a3, .LBB48_10 +; RV32-NEXT: srai a0, a6, 31 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: bltz a6, .LBB48_10 ; RV32-NEXT: .LBB48_26: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltu a4, a1, .LBB48_11 +; RV32-NEXT: li a6, 0 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgez a6, .LBB48_11 ; RV32-NEXT: .LBB48_27: # %entry -; RV32-NEXT: li a7, 0 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a4, .LBB48_12 +; RV32-NEXT: lui t0, 524288 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: bltu a4, a5, .LBB48_12 ; RV32-NEXT: .LBB48_28: # %entry -; RV32-NEXT: mv a0, a7 -; RV32-NEXT: bgez a3, .LBB48_13 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: and a7, a0, a6 +; RV32-NEXT: beq a7, a2, .LBB48_13 ; RV32-NEXT: .LBB48_29: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a6, a6, a3 -; RV32-NEXT: beq a6, a2, .LBB48_14 +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: mv t0, a3 +; RV32-NEXT: bltu a4, a5, .LBB48_14 ; RV32-NEXT: .LBB48_30: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: bgez a3, .LBB48_15 +; RV32-NEXT: li t0, 0 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a5, a4, .LBB48_15 ; RV32-NEXT: .LBB48_31: # %entry -; RV32-NEXT: lui a5, 524288 -; RV32-NEXT: bltu a4, a1, .LBB48_16 +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: bgez a6, .LBB48_16 ; RV32-NEXT: .LBB48_32: # %entry -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a6, a2, .LBB48_17 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: bne a7, a2, .LBB48_17 ; RV32-NEXT: j .LBB48_18 ; ; RV64-LABEL: stest_f32i64_mm: @@ -3948,31 +3955,31 @@ ; RV32-NEXT: beqz a0, .LBB49_3 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: beq a2, a1, .LBB49_4 +; RV32-NEXT: beqz a2, .LBB49_4 ; RV32-NEXT: .LBB49_2: ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: j .LBB49_5 ; RV32-NEXT: .LBB49_3: ; RV32-NEXT: seqz a2, a3 -; RV32-NEXT: bne a2, a1, .LBB49_2 +; RV32-NEXT: bnez a2, .LBB49_2 ; RV32-NEXT: .LBB49_4: # %entry ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: .LBB49_5: # %entry ; RV32-NEXT: xori a3, a3, 1 ; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq a3, a1, .LBB49_7 +; RV32-NEXT: beqz a3, .LBB49_7 ; RV32-NEXT: # %bb.6: # %entry ; RV32-NEXT: mv a0, a4 ; RV32-NEXT: .LBB49_7: # %entry -; RV32-NEXT: bne a2, a1, .LBB49_9 +; RV32-NEXT: bnez a2, .LBB49_9 ; RV32-NEXT: # %bb.8: # %entry ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bne a3, a1, .LBB49_10 +; RV32-NEXT: bnez a3, .LBB49_10 ; RV32-NEXT: j .LBB49_11 ; RV32-NEXT: .LBB49_9: ; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: beq a3, a1, .LBB49_11 +; RV32-NEXT: beqz a3, .LBB49_11 ; RV32-NEXT: .LBB49_10: # %entry ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB49_11: # %entry @@ -4017,91 +4024,86 @@ ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a0, 16(sp) ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: bgez a2, .LBB50_5 +; RV32-NEXT: lw a3, 16(sp) +; RV32-NEXT: beqz a2, .LBB50_3 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgeu a0, a1, .LBB50_6 -; RV32-NEXT: .LBB50_2: # %entry -; RV32-NEXT: beqz a2, .LBB50_7 -; RV32-NEXT: .LBB50_3: # %entry -; RV32-NEXT: slti a1, a2, 0 -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: beqz a1, .LBB50_8 -; RV32-NEXT: .LBB50_4: -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: j .LBB50_9 +; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: beqz a0, .LBB50_4 +; RV32-NEXT: .LBB50_2: +; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: j .LBB50_5 +; RV32-NEXT: .LBB50_3: +; RV32-NEXT: seqz a0, a3 +; RV32-NEXT: bnez a0, .LBB50_2 +; RV32-NEXT: .LBB50_4: # %entry +; RV32-NEXT: li a5, 0 ; RV32-NEXT: .LBB50_5: # %entry -; RV32-NEXT: li a4, 1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltu a0, a1, .LBB50_2 -; RV32-NEXT: .LBB50_6: # %entry -; RV32-NEXT: li a3, 1 -; RV32-NEXT: bnez a2, .LBB50_3 -; RV32-NEXT: .LBB50_7: -; RV32-NEXT: seqz a1, a0 -; RV32-NEXT: bnez a1, .LBB50_4 -; RV32-NEXT: .LBB50_8: # %entry +; RV32-NEXT: xori a1, a3, 1 +; RV32-NEXT: or a4, a1, a2 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: beqz a4, .LBB50_7 +; RV32-NEXT: # %bb.6: # %entry +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB50_7: # %entry +; RV32-NEXT: bnez a0, .LBB50_9 +; RV32-NEXT: # %bb.8: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB50_9: # %entry -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: or a0, a0, a2 -; RV32-NEXT: li a4, 0 -; RV32-NEXT: beqz a0, .LBB50_11 -; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: bnez a4, .LBB50_10 +; RV32-NEXT: j .LBB50_11 +; RV32-NEXT: .LBB50_9: +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: li a0, 0 +; RV32-NEXT: beqz a4, .LBB50_11 +; RV32-NEXT: .LBB50_10: # %entry +; RV32-NEXT: mv a0, a5 ; RV32-NEXT: .LBB50_11: # %entry -; RV32-NEXT: bnez a1, .LBB50_13 +; RV32-NEXT: li a5, 1 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: bgez a2, .LBB50_17 ; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: bnez a0, .LBB50_14 -; RV32-NEXT: j .LBB50_15 -; RV32-NEXT: .LBB50_13: -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a0, .LBB50_15 +; RV32-NEXT: bgeu a3, a5, .LBB50_18 +; RV32-NEXT: .LBB50_13: # %entry +; RV32-NEXT: bnez a2, .LBB50_19 ; RV32-NEXT: .LBB50_14: # %entry -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: .LBB50_15: # %entry ; RV32-NEXT: bgez a2, .LBB50_20 -; RV32-NEXT: # %bb.16: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: beqz a1, .LBB50_21 +; RV32-NEXT: .LBB50_15: # %entry +; RV32-NEXT: beqz a2, .LBB50_21 +; RV32-NEXT: .LBB50_16: # %entry +; RV32-NEXT: sgtz a4, a2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: beqz a4, .LBB50_22 +; RV32-NEXT: j .LBB50_23 ; RV32-NEXT: .LBB50_17: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bnez a1, .LBB50_22 +; RV32-NEXT: li a4, 1 +; RV32-NEXT: bltu a3, a5, .LBB50_13 ; RV32-NEXT: .LBB50_18: # %entry -; RV32-NEXT: beqz a2, .LBB50_23 +; RV32-NEXT: li a3, 1 +; RV32-NEXT: beqz a2, .LBB50_14 ; RV32-NEXT: .LBB50_19: # %entry -; RV32-NEXT: sgtz a5, a2 -; RV32-NEXT: beqz a5, .LBB50_24 -; RV32-NEXT: j .LBB50_25 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: bltz a2, .LBB50_15 ; RV32-NEXT: .LBB50_20: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: bnez a1, .LBB50_17 -; RV32-NEXT: .LBB50_21: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: beqz a1, .LBB50_18 +; RV32-NEXT: bnez a2, .LBB50_16 +; RV32-NEXT: .LBB50_21: +; RV32-NEXT: snez a4, a3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bnez a4, .LBB50_23 ; RV32-NEXT: .LBB50_22: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bnez a2, .LBB50_19 -; RV32-NEXT: .LBB50_23: -; RV32-NEXT: snez a5, a3 -; RV32-NEXT: bnez a5, .LBB50_25 -; RV32-NEXT: .LBB50_24: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a5, 0 +; RV32-NEXT: .LBB50_23: # %entry +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: beqz a1, .LBB50_30 +; RV32-NEXT: # %bb.24: # %entry +; RV32-NEXT: bnez a1, .LBB50_31 ; RV32-NEXT: .LBB50_25: # %entry ; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB50_30 -; RV32-NEXT: # %bb.26: # %entry +; RV32-NEXT: bnez a2, .LBB50_32 +; RV32-NEXT: .LBB50_26: # %entry ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: beqz a5, .LBB50_31 +; RV32-NEXT: beqz a4, .LBB50_33 ; RV32-NEXT: .LBB50_27: # %entry ; RV32-NEXT: beqz a2, .LBB50_29 ; RV32-NEXT: .LBB50_28: # %entry @@ -4111,10 +4113,17 @@ ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB50_30: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bnez a5, .LBB50_27 +; RV32-NEXT: li a6, 0 +; RV32-NEXT: beqz a1, .LBB50_25 ; RV32-NEXT: .LBB50_31: # %entry +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: beqz a2, .LBB50_26 +; RV32-NEXT: .LBB50_32: # %entry +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: mv a3, a1 +; RV32-NEXT: bnez a4, .LBB50_27 +; RV32-NEXT: .LBB50_33: # %entry ; RV32-NEXT: li a3, 0 ; RV32-NEXT: bnez a2, .LBB50_28 ; RV32-NEXT: j .LBB50_29 @@ -4176,112 +4185,114 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: lw a3, 20(sp) -; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a7, 8(sp) +; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a0, a4, -1 ; RV32-NEXT: li a2, -1 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltz a3, .LBB51_2 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: bltu a5, a0, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a7, -1 +; RV32-NEXT: li a1, -1 ; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a6, a4, -1 -; RV32-NEXT: mv t0, a5 -; RV32-NEXT: bgeu a1, a6, .LBB51_19 +; RV32-NEXT: lw a6, 20(sp) +; RV32-NEXT: mv a3, a7 +; RV32-NEXT: bne a5, a0, .LBB51_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: bne a1, a6, .LBB51_20 +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: bgez a6, .LBB51_20 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: or t0, a1, a6 ; RV32-NEXT: bnez t0, .LBB51_21 ; RV32-NEXT: .LBB51_5: # %entry -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bgez a3, .LBB51_22 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgez a6, .LBB51_22 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: bgeu a1, a6, .LBB51_23 +; RV32-NEXT: bgeu a5, a0, .LBB51_23 ; RV32-NEXT: .LBB51_7: # %entry ; RV32-NEXT: bnez t0, .LBB51_24 ; RV32-NEXT: .LBB51_8: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: bnez a3, .LBB51_25 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: bnez a6, .LBB51_25 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: bgez a3, .LBB51_26 +; RV32-NEXT: bgez a6, .LBB51_26 ; RV32-NEXT: .LBB51_10: # %entry -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bgeu a4, a1, .LBB51_27 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bltz a6, .LBB51_27 ; RV32-NEXT: .LBB51_11: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a4, .LBB51_28 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: bgeu a4, a5, .LBB51_28 ; RV32-NEXT: .LBB51_12: # %entry -; RV32-NEXT: bltz a3, .LBB51_29 +; RV32-NEXT: and a7, a0, a6 +; RV32-NEXT: bne a7, a2, .LBB51_29 ; RV32-NEXT: .LBB51_13: # %entry -; RV32-NEXT: and a6, a6, a3 -; RV32-NEXT: bne a6, a2, .LBB51_30 +; RV32-NEXT: mv t0, a3 +; RV32-NEXT: bgeu a4, a5, .LBB51_30 ; RV32-NEXT: .LBB51_14: # %entry -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: bltz a3, .LBB51_31 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: bne a5, a4, .LBB51_31 ; RV32-NEXT: .LBB51_15: # %entry -; RV32-NEXT: bgeu a4, a1, .LBB51_32 +; RV32-NEXT: bltz a6, .LBB51_32 ; RV32-NEXT: .LBB51_16: # %entry -; RV32-NEXT: beq a6, a2, .LBB51_18 +; RV32-NEXT: beq a7, a2, .LBB51_18 ; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a0, a3 ; RV32-NEXT: .LBB51_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: li t0, -1 -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: beq a1, a6, .LBB51_4 +; RV32-NEXT: mv a3, a1 +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: bltz a6, .LBB51_4 ; RV32-NEXT: .LBB51_20: # %entry -; RV32-NEXT: mv a5, t0 -; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: li a7, -1 +; RV32-NEXT: or t0, a1, a6 ; RV32-NEXT: beqz t0, .LBB51_5 ; RV32-NEXT: .LBB51_21: # %entry -; RV32-NEXT: mv a5, a7 -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bltz a3, .LBB51_6 +; RV32-NEXT: mv a3, a7 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltz a6, .LBB51_6 ; RV32-NEXT: .LBB51_22: # %entry -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: bltu a1, a6, .LBB51_7 +; RV32-NEXT: mv a7, a0 +; RV32-NEXT: bltu a5, a0, .LBB51_7 ; RV32-NEXT: .LBB51_23: # %entry -; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a5, a0 ; RV32-NEXT: beqz t0, .LBB51_8 ; RV32-NEXT: .LBB51_24: # %entry -; RV32-NEXT: mv a1, a7 -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a3, .LBB51_9 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: beqz a6, .LBB51_9 ; RV32-NEXT: .LBB51_25: # %entry -; RV32-NEXT: srai a6, a3, 31 -; RV32-NEXT: and a6, a6, a0 -; RV32-NEXT: bltz a3, .LBB51_10 +; RV32-NEXT: srai a0, a6, 31 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: bltz a6, .LBB51_10 ; RV32-NEXT: .LBB51_26: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltu a4, a1, .LBB51_11 +; RV32-NEXT: li a6, 0 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgez a6, .LBB51_11 ; RV32-NEXT: .LBB51_27: # %entry -; RV32-NEXT: li a7, 0 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a4, .LBB51_12 +; RV32-NEXT: lui t0, 524288 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: bltu a4, a5, .LBB51_12 ; RV32-NEXT: .LBB51_28: # %entry -; RV32-NEXT: mv a0, a7 -; RV32-NEXT: bgez a3, .LBB51_13 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: and a7, a0, a6 +; RV32-NEXT: beq a7, a2, .LBB51_13 ; RV32-NEXT: .LBB51_29: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a6, a6, a3 -; RV32-NEXT: beq a6, a2, .LBB51_14 +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: mv t0, a3 +; RV32-NEXT: bltu a4, a5, .LBB51_14 ; RV32-NEXT: .LBB51_30: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: bgez a3, .LBB51_15 +; RV32-NEXT: li t0, 0 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a5, a4, .LBB51_15 ; RV32-NEXT: .LBB51_31: # %entry -; RV32-NEXT: lui a5, 524288 -; RV32-NEXT: bltu a4, a1, .LBB51_16 +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: bgez a6, .LBB51_16 ; RV32-NEXT: .LBB51_32: # %entry -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a6, a2, .LBB51_17 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: bne a7, a2, .LBB51_17 ; RV32-NEXT: j .LBB51_18 ; ; RV64-LABEL: stest_f16i64_mm: @@ -4361,31 +4372,31 @@ ; RV32-NEXT: beqz a0, .LBB52_3 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: beq a2, a1, .LBB52_4 +; RV32-NEXT: beqz a2, .LBB52_4 ; RV32-NEXT: .LBB52_2: ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: j .LBB52_5 ; RV32-NEXT: .LBB52_3: ; RV32-NEXT: seqz a2, a3 -; RV32-NEXT: bne a2, a1, .LBB52_2 +; RV32-NEXT: bnez a2, .LBB52_2 ; RV32-NEXT: .LBB52_4: # %entry ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: .LBB52_5: # %entry ; RV32-NEXT: xori a3, a3, 1 ; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq a3, a1, .LBB52_7 +; RV32-NEXT: beqz a3, .LBB52_7 ; RV32-NEXT: # %bb.6: # %entry ; RV32-NEXT: mv a0, a4 ; RV32-NEXT: .LBB52_7: # %entry -; RV32-NEXT: bne a2, a1, .LBB52_9 +; RV32-NEXT: bnez a2, .LBB52_9 ; RV32-NEXT: # %bb.8: # %entry ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bne a3, a1, .LBB52_10 +; RV32-NEXT: bnez a3, .LBB52_10 ; RV32-NEXT: j .LBB52_11 ; RV32-NEXT: .LBB52_9: ; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: beq a3, a1, .LBB52_11 +; RV32-NEXT: beqz a3, .LBB52_11 ; RV32-NEXT: .LBB52_10: # %entry ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB52_11: # %entry @@ -4434,91 +4445,86 @@ ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a0, 16(sp) ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: bgez a2, .LBB53_5 +; RV32-NEXT: lw a3, 16(sp) +; RV32-NEXT: beqz a2, .LBB53_3 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgeu a0, a1, .LBB53_6 -; RV32-NEXT: .LBB53_2: # %entry -; RV32-NEXT: beqz a2, .LBB53_7 -; RV32-NEXT: .LBB53_3: # %entry -; RV32-NEXT: slti a1, a2, 0 -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: beqz a1, .LBB53_8 -; RV32-NEXT: .LBB53_4: -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: j .LBB53_9 +; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: beqz a0, .LBB53_4 +; RV32-NEXT: .LBB53_2: +; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: j .LBB53_5 +; RV32-NEXT: .LBB53_3: +; RV32-NEXT: seqz a0, a3 +; RV32-NEXT: bnez a0, .LBB53_2 +; RV32-NEXT: .LBB53_4: # %entry +; RV32-NEXT: li a5, 0 ; RV32-NEXT: .LBB53_5: # %entry -; RV32-NEXT: li a4, 1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltu a0, a1, .LBB53_2 -; RV32-NEXT: .LBB53_6: # %entry -; RV32-NEXT: li a3, 1 -; RV32-NEXT: bnez a2, .LBB53_3 -; RV32-NEXT: .LBB53_7: -; RV32-NEXT: seqz a1, a0 -; RV32-NEXT: bnez a1, .LBB53_4 -; RV32-NEXT: .LBB53_8: # %entry +; RV32-NEXT: xori a1, a3, 1 +; RV32-NEXT: or a4, a1, a2 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: beqz a4, .LBB53_7 +; RV32-NEXT: # %bb.6: # %entry +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB53_7: # %entry +; RV32-NEXT: bnez a0, .LBB53_9 +; RV32-NEXT: # %bb.8: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB53_9: # %entry -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: or a0, a0, a2 -; RV32-NEXT: li a4, 0 -; RV32-NEXT: beqz a0, .LBB53_11 -; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: bnez a4, .LBB53_10 +; RV32-NEXT: j .LBB53_11 +; RV32-NEXT: .LBB53_9: +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: li a0, 0 +; RV32-NEXT: beqz a4, .LBB53_11 +; RV32-NEXT: .LBB53_10: # %entry +; RV32-NEXT: mv a0, a5 ; RV32-NEXT: .LBB53_11: # %entry -; RV32-NEXT: bnez a1, .LBB53_13 +; RV32-NEXT: li a5, 1 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: bgez a2, .LBB53_17 ; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: bnez a0, .LBB53_14 -; RV32-NEXT: j .LBB53_15 -; RV32-NEXT: .LBB53_13: -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a0, .LBB53_15 +; RV32-NEXT: bgeu a3, a5, .LBB53_18 +; RV32-NEXT: .LBB53_13: # %entry +; RV32-NEXT: bnez a2, .LBB53_19 ; RV32-NEXT: .LBB53_14: # %entry -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: .LBB53_15: # %entry ; RV32-NEXT: bgez a2, .LBB53_20 -; RV32-NEXT: # %bb.16: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: beqz a1, .LBB53_21 +; RV32-NEXT: .LBB53_15: # %entry +; RV32-NEXT: beqz a2, .LBB53_21 +; RV32-NEXT: .LBB53_16: # %entry +; RV32-NEXT: sgtz a4, a2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: beqz a4, .LBB53_22 +; RV32-NEXT: j .LBB53_23 ; RV32-NEXT: .LBB53_17: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bnez a1, .LBB53_22 +; RV32-NEXT: li a4, 1 +; RV32-NEXT: bltu a3, a5, .LBB53_13 ; RV32-NEXT: .LBB53_18: # %entry -; RV32-NEXT: beqz a2, .LBB53_23 +; RV32-NEXT: li a3, 1 +; RV32-NEXT: beqz a2, .LBB53_14 ; RV32-NEXT: .LBB53_19: # %entry -; RV32-NEXT: sgtz a5, a2 -; RV32-NEXT: beqz a5, .LBB53_24 -; RV32-NEXT: j .LBB53_25 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: bltz a2, .LBB53_15 ; RV32-NEXT: .LBB53_20: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: bnez a1, .LBB53_17 -; RV32-NEXT: .LBB53_21: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: beqz a1, .LBB53_18 +; RV32-NEXT: bnez a2, .LBB53_16 +; RV32-NEXT: .LBB53_21: +; RV32-NEXT: snez a4, a3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bnez a4, .LBB53_23 ; RV32-NEXT: .LBB53_22: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bnez a2, .LBB53_19 -; RV32-NEXT: .LBB53_23: -; RV32-NEXT: snez a5, a3 -; RV32-NEXT: bnez a5, .LBB53_25 -; RV32-NEXT: .LBB53_24: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a5, 0 +; RV32-NEXT: .LBB53_23: # %entry +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: beqz a1, .LBB53_30 +; RV32-NEXT: # %bb.24: # %entry +; RV32-NEXT: bnez a1, .LBB53_31 ; RV32-NEXT: .LBB53_25: # %entry ; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB53_30 -; RV32-NEXT: # %bb.26: # %entry +; RV32-NEXT: bnez a2, .LBB53_32 +; RV32-NEXT: .LBB53_26: # %entry ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: beqz a5, .LBB53_31 +; RV32-NEXT: beqz a4, .LBB53_33 ; RV32-NEXT: .LBB53_27: # %entry ; RV32-NEXT: beqz a2, .LBB53_29 ; RV32-NEXT: .LBB53_28: # %entry @@ -4528,10 +4534,17 @@ ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB53_30: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bnez a5, .LBB53_27 +; RV32-NEXT: li a6, 0 +; RV32-NEXT: beqz a1, .LBB53_25 ; RV32-NEXT: .LBB53_31: # %entry +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: beqz a2, .LBB53_26 +; RV32-NEXT: .LBB53_32: # %entry +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: mv a3, a1 +; RV32-NEXT: bnez a4, .LBB53_27 +; RV32-NEXT: .LBB53_33: # %entry ; RV32-NEXT: li a3, 0 ; RV32-NEXT: bnez a2, .LBB53_28 ; RV32-NEXT: j .LBB53_29 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -3625,7 +3625,7 @@ ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: bne a3, a4, .LBB46_7 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: bne s1, a1, .LBB46_8 +; CHECK-NEXT: bnez s1, .LBB46_8 ; CHECK-NEXT: .LBB46_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB46_6 ; CHECK-NEXT: .LBB46_5: # %entry @@ -3639,7 +3639,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB46_7: # %entry ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: beq s1, a1, .LBB46_4 +; CHECK-NEXT: beqz s1, .LBB46_4 ; CHECK-NEXT: .LBB46_8: # %entry ; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB46_5 @@ -3893,7 +3893,7 @@ ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: bne a3, a4, .LBB49_7 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: bne s1, a1, .LBB49_8 +; CHECK-NEXT: bnez s1, .LBB49_8 ; CHECK-NEXT: .LBB49_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB49_6 ; CHECK-NEXT: .LBB49_5: # %entry @@ -3907,7 +3907,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB49_7: # %entry ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: beq s1, a1, .LBB49_4 +; CHECK-NEXT: beqz s1, .LBB49_4 ; CHECK-NEXT: .LBB49_8: # %entry ; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB49_5 @@ -4165,7 +4165,7 @@ ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: bne a3, a4, .LBB52_7 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: bne s1, a1, .LBB52_8 +; CHECK-NEXT: bnez s1, .LBB52_8 ; CHECK-NEXT: .LBB52_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB52_6 ; CHECK-NEXT: .LBB52_5: # %entry @@ -4179,7 +4179,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB52_7: # %entry ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: beq s1, a1, .LBB52_4 +; CHECK-NEXT: beqz s1, .LBB52_4 ; CHECK-NEXT: .LBB52_8: # %entry ; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB52_5 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -127,98 +127,94 @@ ; ; RV32I-LABEL: fcvt_si_h_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 815104 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: lui s3, 1048568 -; RV32I-NEXT: bltz s2, .LBB1_2 +; RV32I-NEXT: lui s2, 1048568 +; RV32I-NEXT: bltz s1, .LBB1_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB1_2: # %start ; RV32I-NEXT: lui a0, 290816 ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB1_4 +; RV32I-NEXT: blez a0, .LBB1_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: .LBB1_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB1_6 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: bnez a1, .LBB1_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: .LBB1_6: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_si_h_sat: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 815104 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB1_2 +; RV64I-NEXT: lui s2, 1048568 +; RV64I-NEXT: bltz s1, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB1_2: # %start ; RV64I-NEXT: lui a0, 290816 ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB1_4 +; RV64I-NEXT: blez a0, .LBB1_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 -; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: addiw s2, a0, -1 ; RV64I-NEXT: .LBB1_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB1_6 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB1_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: .LBB1_6: # %start -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) @@ -483,45 +479,43 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 847872 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: lui s4, 524288 ; RV32I-NEXT: lui s3, 524288 -; RV32I-NEXT: bltz s2, .LBB5_2 +; RV32I-NEXT: lui s2, 524288 +; RV32I-NEXT: bltz s1, .LBB5_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB5_2: # %start ; RV32I-NEXT: lui a0, 323584 ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB5_4 +; RV32I-NEXT: blez a0, .LBB5_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: addi s3, s4, -1 +; RV32I-NEXT: addi s2, s3, -1 ; RV32I-NEXT: .LBB5_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB5_6 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: bnez a1, .LBB5_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: .LBB5_6: # %start -; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -533,45 +527,43 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 847872 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: lui s3, 524288 -; RV64I-NEXT: bltz s2, .LBB5_2 +; RV64I-NEXT: lui s2, 524288 +; RV64I-NEXT: bltz s1, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB5_2: # %start ; RV64I-NEXT: lui a0, 323584 ; RV64I-NEXT: addiw a1, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB5_4 +; RV64I-NEXT: blez a0, .LBB5_4 ; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: addiw s3, s4, -1 +; RV64I-NEXT: addiw s2, s3, -1 ; RV64I-NEXT: .LBB5_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB5_6 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB5_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: .LBB5_6: # %start -; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret start: @@ -1034,7 +1026,7 @@ ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: mv s3, s1 -; RV32I-NEXT: bne a0, s1, .LBB10_6 +; RV32I-NEXT: bnez a0, .LBB10_6 ; RV32I-NEXT: # %bb.5: # %start ; RV32I-NEXT: mv s3, s6 ; RV32I-NEXT: .LBB10_6: # %start @@ -1043,7 +1035,7 @@ ; RV32I-NEXT: call __gesf2@plt ; RV32I-NEXT: lui s6, 524288 ; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: blt a0, s1, .LBB10_8 +; RV32I-NEXT: bltz a0, .LBB10_8 ; RV32I-NEXT: # %bb.7: # %start ; RV32I-NEXT: mv s5, s2 ; RV32I-NEXT: .LBB10_8: # %start @@ -1057,7 +1049,7 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB10_12 +; RV32I-NEXT: bnez a0, .LBB10_12 ; RV32I-NEXT: # %bb.11: # %start ; RV32I-NEXT: mv s1, s5 ; RV32I-NEXT: .LBB10_12: # %start @@ -1082,47 +1074,45 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 913408 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 -; RV64I-NEXT: li s4, -1 -; RV64I-NEXT: bltz s3, .LBB10_2 +; RV64I-NEXT: li s3, -1 +; RV64I-NEXT: bltz s2, .LBB10_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: j .LBB10_3 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: slli s2, s4, 63 +; RV64I-NEXT: slli s1, s3, 63 ; RV64I-NEXT: .LBB10_3: # %start ; RV64I-NEXT: lui a0, 389120 ; RV64I-NEXT: addiw a1, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB10_5 +; RV64I-NEXT: blez a0, .LBB10_5 ; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srli s2, s4, 1 +; RV64I-NEXT: srli s1, s3, 1 ; RV64I-NEXT: .LBB10_5: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB10_7 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: bnez a1, .LBB10_7 ; RV64I-NEXT: # %bb.6: # %start -; RV64I-NEXT: mv s1, s2 -; RV64I-NEXT: .LBB10_7: # %start ; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: .LBB10_7: # %start ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret start: @@ -2371,12 +2361,11 @@ ; ; RV32I-LABEL: fcvt_w_s_sat_i16: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt @@ -2386,46 +2375,44 @@ ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: lui s3, 1048568 +; RV32I-NEXT: lui s2, 1048568 ; RV32I-NEXT: bltz s1, .LBB32_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB32_2: # %start ; RV32I-NEXT: lui a0, 290816 ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s2, a0, .LBB32_4 +; RV32I-NEXT: blez a0, .LBB32_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: .LBB32_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s2, .LBB32_6 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: bnez a0, .LBB32_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: .LBB32_6: # %start -; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: slli a0, a1, 16 ; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_w_s_sat_i16: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt @@ -2435,36 +2422,35 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: lui s3, 1048568 +; RV64I-NEXT: lui s2, 1048568 ; RV64I-NEXT: bltz s1, .LBB32_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB32_2: # %start ; RV64I-NEXT: lui a0, 290816 ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s2, a0, .LBB32_4 +; RV64I-NEXT: blez a0, .LBB32_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 -; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: addiw s2, a0, -1 ; RV64I-NEXT: .LBB32_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s2, .LBB32_6 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: bnez a0, .LBB32_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: .LBB32_6: # %start -; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) @@ -2765,12 +2751,11 @@ ; ; RV32I-LABEL: fcvt_w_s_sat_i8: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt @@ -2780,45 +2765,43 @@ ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: li s3, -128 +; RV32I-NEXT: li s2, -128 ; RV32I-NEXT: bltz s1, .LBB36_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: .LBB36_2: # %start ; RV32I-NEXT: lui a1, 274400 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: li s1, 127 -; RV32I-NEXT: blt s2, a0, .LBB36_4 +; RV32I-NEXT: bgtz a0, .LBB36_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv s1, s2 ; RV32I-NEXT: .LBB36_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s2, .LBB36_6 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: bnez a0, .LBB36_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s2, s1 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: .LBB36_6: # %start -; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_w_s_sat_i8: ; RV64I: # %bb.0: # %start -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt @@ -2828,35 +2811,34 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: li s3, -128 +; RV64I-NEXT: li s2, -128 ; RV64I-NEXT: bltz s1, .LBB36_2 ; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: .LBB36_2: # %start ; RV64I-NEXT: lui a1, 274400 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: li s1, 127 -; RV64I-NEXT: blt s2, a0, .LBB36_4 +; RV64I-NEXT: bgtz a0, .LBB36_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s1, s2 ; RV64I-NEXT: .LBB36_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s2, .LBB36_6 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: bnez a0, .LBB36_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s2, s1 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: .LBB36_6: # %start -; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: %0 = tail call i8 @llvm.fptosi.sat.i8.f16(half %a) diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll @@ -141,24 +141,24 @@ ; CHECK-LABEL: rol_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a3, a2, 26 -; CHECK-NEXT: srli a3, a3, 31 +; CHECK-NEXT: srli a5, a3, 31 ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: bnez a3, .LBB7_2 +; CHECK-NEXT: bnez a5, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: sll a5, a4, a2 -; CHECK-NEXT: bnez a3, .LBB7_4 +; CHECK-NEXT: sll a3, a4, a2 +; CHECK-NEXT: bnez a5, .LBB7_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: not a6, a2 -; CHECK-NEXT: srl a1, a1, a6 -; CHECK-NEXT: or a3, a5, a1 +; CHECK-NEXT: not a5, a2 +; CHECK-NEXT: srl a1, a1, a5 +; CHECK-NEXT: or a3, a3, a1 ; CHECK-NEXT: sll a0, a0, a2 ; CHECK-NEXT: srli a1, a4, 1 -; CHECK-NEXT: srl a1, a1, a6 +; CHECK-NEXT: srl a1, a1, a5 ; CHECK-NEXT: or a1, a0, a1 ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: ret @@ -193,24 +193,24 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: ror_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a4, a2, 32 +; CHECK-NEXT: andi a5, a2, 32 ; CHECK-NEXT: mv a3, a0 -; CHECK-NEXT: beqz a4, .LBB9_2 +; CHECK-NEXT: beqz a5, .LBB9_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: srl a5, a3, a2 -; CHECK-NEXT: beqz a4, .LBB9_4 +; CHECK-NEXT: srl a4, a3, a2 +; CHECK-NEXT: beqz a5, .LBB9_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: .LBB9_4: ; CHECK-NEXT: slli a0, a1, 1 -; CHECK-NEXT: not a4, a2 -; CHECK-NEXT: sll a0, a0, a4 -; CHECK-NEXT: or a0, a0, a5 +; CHECK-NEXT: not a5, a2 +; CHECK-NEXT: sll a0, a0, a5 +; CHECK-NEXT: or a0, a0, a4 ; CHECK-NEXT: srl a1, a1, a2 ; CHECK-NEXT: slli a2, a3, 1 -; CHECK-NEXT: sll a2, a2, a4 +; CHECK-NEXT: sll a2, a2, a5 ; CHECK-NEXT: or a1, a2, a1 ; CHECK-NEXT: ret %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -4470,22 +4470,22 @@ ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi a3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi a3, a6, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB48_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB48_4 ; RV64ZVE32F-NEXT: .LBB48_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB48_5 ; RV64ZVE32F-NEXT: .LBB48_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB48_2 ; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu @@ -4496,20 +4496,20 @@ ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB48_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: andi a5, a6, 4 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a6, .LBB48_7 +; RV64ZVE32F-NEXT: beqz a5, .LBB48_7 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a6, v9 -; RV64ZVE32F-NEXT: slli a6, a6, 3 -; RV64ZVE32F-NEXT: add a6, a1, a6 -; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: vmv.x.s a5, v9 +; RV64ZVE32F-NEXT: slli a5, a5, 3 +; RV64ZVE32F-NEXT: add a5, a1, a5 +; RV64ZVE32F-NEXT: ld a5, 0(a5) ; RV64ZVE32F-NEXT: j .LBB48_8 ; RV64ZVE32F-NEXT: .LBB48_7: -; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: ld a5, 16(a2) ; RV64ZVE32F-NEXT: .LBB48_8: # %else5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: andi a7, a6, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB48_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 @@ -4519,18 +4519,18 @@ ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a1, a7 ; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: bnez t0, .LBB48_13 ; RV64ZVE32F-NEXT: .LBB48_10: ; RV64ZVE32F-NEXT: ld t0, 32(a2) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: bnez t1, .LBB48_14 ; RV64ZVE32F-NEXT: .LBB48_11: ; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB48_15 ; RV64ZVE32F-NEXT: .LBB48_12: ; RV64ZVE32F-NEXT: ld a7, 24(a2) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB48_10 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu @@ -4538,7 +4538,7 @@ ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB48_11 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu @@ -4549,7 +4549,7 @@ ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB48_15: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: andi t2, a5, 64 +; RV64ZVE32F-NEXT: andi t2, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_18 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16 @@ -4557,15 +4557,15 @@ ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: ld t2, 0(t2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: bnez a5, .LBB48_19 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: bnez a6, .LBB48_19 ; RV64ZVE32F-NEXT: .LBB48_17: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB48_20 ; RV64ZVE32F-NEXT: .LBB48_18: ; RV64ZVE32F-NEXT: ld t2, 48(a2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: beqz a5, .LBB48_17 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: beqz a6, .LBB48_17 ; RV64ZVE32F-NEXT: .LBB48_19: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4576,7 +4576,7 @@ ; RV64ZVE32F-NEXT: .LBB48_20: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a5, 16(a0) ; RV64ZVE32F-NEXT: sd a7, 24(a0) ; RV64ZVE32F-NEXT: sd t0, 32(a0) ; RV64ZVE32F-NEXT: sd t1, 40(a0) @@ -4751,22 +4751,22 @@ ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi a3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi a3, a6, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB49_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB49_4 ; RV64ZVE32F-NEXT: .LBB49_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB49_5 ; RV64ZVE32F-NEXT: .LBB49_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB49_2 ; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu @@ -4777,20 +4777,20 @@ ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB49_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: andi a5, a6, 4 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a6, .LBB49_7 +; RV64ZVE32F-NEXT: beqz a5, .LBB49_7 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a6, v9 -; RV64ZVE32F-NEXT: slli a6, a6, 3 -; RV64ZVE32F-NEXT: add a6, a1, a6 -; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: vmv.x.s a5, v9 +; RV64ZVE32F-NEXT: slli a5, a5, 3 +; RV64ZVE32F-NEXT: add a5, a1, a5 +; RV64ZVE32F-NEXT: ld a5, 0(a5) ; RV64ZVE32F-NEXT: j .LBB49_8 ; RV64ZVE32F-NEXT: .LBB49_7: -; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: ld a5, 16(a2) ; RV64ZVE32F-NEXT: .LBB49_8: # %else5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: andi a7, a6, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB49_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 @@ -4800,18 +4800,18 @@ ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a1, a7 ; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: bnez t0, .LBB49_13 ; RV64ZVE32F-NEXT: .LBB49_10: ; RV64ZVE32F-NEXT: ld t0, 32(a2) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: bnez t1, .LBB49_14 ; RV64ZVE32F-NEXT: .LBB49_11: ; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB49_15 ; RV64ZVE32F-NEXT: .LBB49_12: ; RV64ZVE32F-NEXT: ld a7, 24(a2) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB49_10 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu @@ -4819,7 +4819,7 @@ ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB49_11 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu @@ -4830,7 +4830,7 @@ ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB49_15: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: andi t2, a5, 64 +; RV64ZVE32F-NEXT: andi t2, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_18 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16 @@ -4838,15 +4838,15 @@ ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: ld t2, 0(t2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: bnez a5, .LBB49_19 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: bnez a6, .LBB49_19 ; RV64ZVE32F-NEXT: .LBB49_17: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB49_20 ; RV64ZVE32F-NEXT: .LBB49_18: ; RV64ZVE32F-NEXT: ld t2, 48(a2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: beqz a5, .LBB49_17 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: beqz a6, .LBB49_17 ; RV64ZVE32F-NEXT: .LBB49_19: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4857,7 +4857,7 @@ ; RV64ZVE32F-NEXT: .LBB49_20: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a5, 16(a0) ; RV64ZVE32F-NEXT: sd a7, 24(a0) ; RV64ZVE32F-NEXT: sd t0, 32(a0) ; RV64ZVE32F-NEXT: sd t1, 40(a0) @@ -5321,8 +5321,8 @@ ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi a3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi a3, a6, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB51_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -5330,14 +5330,14 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB51_4 ; RV64ZVE32F-NEXT: .LBB51_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB51_5 ; RV64ZVE32F-NEXT: .LBB51_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB51_2 ; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu @@ -5348,20 +5348,20 @@ ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB51_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu -; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: andi a5, a6, 4 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a6, .LBB51_7 +; RV64ZVE32F-NEXT: beqz a5, .LBB51_7 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a6, v9 -; RV64ZVE32F-NEXT: slli a6, a6, 3 -; RV64ZVE32F-NEXT: add a6, a1, a6 -; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: vmv.x.s a5, v9 +; RV64ZVE32F-NEXT: slli a5, a5, 3 +; RV64ZVE32F-NEXT: add a5, a1, a5 +; RV64ZVE32F-NEXT: ld a5, 0(a5) ; RV64ZVE32F-NEXT: j .LBB51_8 ; RV64ZVE32F-NEXT: .LBB51_7: -; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: ld a5, 16(a2) ; RV64ZVE32F-NEXT: .LBB51_8: # %else5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu -; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: andi a7, a6, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB51_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 @@ -5371,18 +5371,18 @@ ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a1, a7 ; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: bnez t0, .LBB51_13 ; RV64ZVE32F-NEXT: .LBB51_10: ; RV64ZVE32F-NEXT: ld t0, 32(a2) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: bnez t1, .LBB51_14 ; RV64ZVE32F-NEXT: .LBB51_11: ; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB51_15 ; RV64ZVE32F-NEXT: .LBB51_12: ; RV64ZVE32F-NEXT: ld a7, 24(a2) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB51_10 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu @@ -5390,7 +5390,7 @@ ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB51_11 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu @@ -5401,7 +5401,7 @@ ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB51_15: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu -; RV64ZVE32F-NEXT: andi t2, a5, 64 +; RV64ZVE32F-NEXT: andi t2, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB51_18 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16 @@ -5409,15 +5409,15 @@ ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: ld t2, 0(t2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: bnez a5, .LBB51_19 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: bnez a6, .LBB51_19 ; RV64ZVE32F-NEXT: .LBB51_17: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB51_20 ; RV64ZVE32F-NEXT: .LBB51_18: ; RV64ZVE32F-NEXT: ld t2, 48(a2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: beqz a5, .LBB51_17 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: beqz a6, .LBB51_17 ; RV64ZVE32F-NEXT: .LBB51_19: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5428,7 +5428,7 @@ ; RV64ZVE32F-NEXT: .LBB51_20: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a5, 16(a0) ; RV64ZVE32F-NEXT: sd a7, 24(a0) ; RV64ZVE32F-NEXT: sd t0, 32(a0) ; RV64ZVE32F-NEXT: sd t1, 40(a0) @@ -5603,8 +5603,8 @@ ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi a3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi a3, a6, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB52_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -5612,14 +5612,14 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB52_4 ; RV64ZVE32F-NEXT: .LBB52_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB52_5 ; RV64ZVE32F-NEXT: .LBB52_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB52_2 ; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu @@ -5630,20 +5630,20 @@ ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB52_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu -; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: andi a5, a6, 4 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a6, .LBB52_7 +; RV64ZVE32F-NEXT: beqz a5, .LBB52_7 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a6, v9 -; RV64ZVE32F-NEXT: slli a6, a6, 3 -; RV64ZVE32F-NEXT: add a6, a1, a6 -; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: vmv.x.s a5, v9 +; RV64ZVE32F-NEXT: slli a5, a5, 3 +; RV64ZVE32F-NEXT: add a5, a1, a5 +; RV64ZVE32F-NEXT: ld a5, 0(a5) ; RV64ZVE32F-NEXT: j .LBB52_8 ; RV64ZVE32F-NEXT: .LBB52_7: -; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: ld a5, 16(a2) ; RV64ZVE32F-NEXT: .LBB52_8: # %else5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu -; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: andi a7, a6, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB52_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 @@ -5653,18 +5653,18 @@ ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a1, a7 ; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: bnez t0, .LBB52_13 ; RV64ZVE32F-NEXT: .LBB52_10: ; RV64ZVE32F-NEXT: ld t0, 32(a2) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: bnez t1, .LBB52_14 ; RV64ZVE32F-NEXT: .LBB52_11: ; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB52_15 ; RV64ZVE32F-NEXT: .LBB52_12: ; RV64ZVE32F-NEXT: ld a7, 24(a2) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB52_10 ; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu @@ -5672,7 +5672,7 @@ ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB52_11 ; RV64ZVE32F-NEXT: .LBB52_14: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu @@ -5683,7 +5683,7 @@ ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB52_15: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu -; RV64ZVE32F-NEXT: andi t2, a5, 64 +; RV64ZVE32F-NEXT: andi t2, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB52_18 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16 @@ -5691,15 +5691,15 @@ ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: ld t2, 0(t2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: bnez a5, .LBB52_19 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: bnez a6, .LBB52_19 ; RV64ZVE32F-NEXT: .LBB52_17: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB52_20 ; RV64ZVE32F-NEXT: .LBB52_18: ; RV64ZVE32F-NEXT: ld t2, 48(a2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: beqz a5, .LBB52_17 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: beqz a6, .LBB52_17 ; RV64ZVE32F-NEXT: .LBB52_19: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5710,7 +5710,7 @@ ; RV64ZVE32F-NEXT: .LBB52_20: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a5, 16(a0) ; RV64ZVE32F-NEXT: sd a7, 24(a0) ; RV64ZVE32F-NEXT: sd t0, 32(a0) ; RV64ZVE32F-NEXT: sd t1, 40(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -102,23 +102,23 @@ define void @test6(i32* nocapture readonly %A, i32* nocapture %B, i64 %n) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a6, a2, e32, m1, ta, mu -; CHECK-NEXT: beqz a6, .LBB5_3 +; CHECK-NEXT: vsetvli a3, a2, e32, m1, ta, mu +; CHECK-NEXT: beqz a3, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: .LBB5_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a3, a0, a5 -; CHECK-NEXT: vle32.v v8, (a3) +; CHECK-NEXT: slli a6, a4, 2 +; CHECK-NEXT: add a5, a0, a6 +; CHECK-NEXT: vle32.v v8, (a5) ; CHECK-NEXT: vmsle.vi v9, v8, -3 ; CHECK-NEXT: vmsgt.vi v10, v8, 2 ; CHECK-NEXT: vmor.mm v0, v9, v10 -; CHECK-NEXT: add a3, a1, a5 -; CHECK-NEXT: vse32.v v8, (a3), v0.t -; CHECK-NEXT: add a4, a4, a6 -; CHECK-NEXT: vsetvli a6, a2, e32, m1, ta, mu -; CHECK-NEXT: bnez a6, .LBB5_2 +; CHECK-NEXT: add a5, a1, a6 +; CHECK-NEXT: vse32.v v8, (a5), v0.t +; CHECK-NEXT: add a4, a4, a3 +; CHECK-NEXT: vsetvli a3, a2, e32, m1, ta, mu +; CHECK-NEXT: bnez a3, .LBB5_2 ; CHECK-NEXT: .LBB5_3: # %for.cond.cleanup ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll --- a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll +++ b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll @@ -161,18 +161,18 @@ ; RV32I-LABEL: sll_redundant_mask_zeros_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a2, 2 -; RV32I-NEXT: andi a3, a2, 60 -; RV32I-NEXT: addi a4, a3, -32 -; RV32I-NEXT: bltz a4, .LBB9_2 +; RV32I-NEXT: andi a4, a2, 60 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: bltz a3, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, a4 +; RV32I-NEXT: sll a1, a0, a3 ; RV32I-NEXT: li a0, 0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: xori a3, a3, 31 -; RV32I-NEXT: srl a3, a4, a3 +; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: xori a4, a4, 31 +; RV32I-NEXT: srl a3, a3, a4 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: ret @@ -192,18 +192,18 @@ ; RV32I-LABEL: srl_redundant_mask_zeros_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a2, 3 -; RV32I-NEXT: andi a3, a2, 56 -; RV32I-NEXT: addi a4, a3, -32 -; RV32I-NEXT: bltz a4, .LBB10_2 +; RV32I-NEXT: andi a4, a2, 56 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: bltz a3, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a1, a4 +; RV32I-NEXT: srl a0, a1, a3 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_2: ; RV32I-NEXT: srl a0, a0, a2 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: xori a3, a3, 31 -; RV32I-NEXT: sll a3, a4, a3 +; RV32I-NEXT: slli a3, a1, 1 +; RV32I-NEXT: xori a4, a4, 31 +; RV32I-NEXT: sll a3, a3, a4 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -552,24 +552,24 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-LABEL: fshr64_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a4, a2, 32 +; RV32I-NEXT: andi a5, a2, 32 ; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: beqz a4, .LBB9_2 +; RV32I-NEXT: beqz a5, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a5, a3, a2 -; RV32I-NEXT: beqz a4, .LBB9_4 +; RV32I-NEXT: srl a4, a3, a2 +; RV32I-NEXT: beqz a5, .LBB9_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB9_4: ; RV32I-NEXT: slli a0, a1, 1 -; RV32I-NEXT: not a4, a2 -; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: not a5, a2 +; RV32I-NEXT: sll a0, a0, a5 +; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: slli a2, a3, 1 -; RV32I-NEXT: sll a2, a2, a4 +; RV32I-NEXT: sll a2, a2, a5 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; @@ -588,56 +588,56 @@ ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 8(a1) -; RV32I-NEXT: lw t2, 0(a1) +; RV32I-NEXT: lw t1, 0(a1) ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a7, 4(a1) +; RV32I-NEXT: lw t0, 4(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: andi t1, a2, 64 -; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: mv a4, t2 -; RV32I-NEXT: beqz t1, .LBB10_2 +; RV32I-NEXT: andi t2, a2, 64 +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: mv a4, t1 +; RV32I-NEXT: beqz t2, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t0, a1 +; RV32I-NEXT: mv a7, a1 ; RV32I-NEXT: mv a4, a3 ; RV32I-NEXT: .LBB10_2: ; RV32I-NEXT: andi a6, a2, 32 ; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: bnez a6, .LBB10_13 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: bnez t1, .LBB10_14 +; RV32I-NEXT: bnez t2, .LBB10_14 ; RV32I-NEXT: .LBB10_4: ; RV32I-NEXT: beqz a6, .LBB10_6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: mv a7, a3 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: slli t3, t0, 1 -; RV32I-NEXT: not t2, a2 -; RV32I-NEXT: beqz t1, .LBB10_8 +; RV32I-NEXT: slli t3, a7, 1 +; RV32I-NEXT: not t1, a2 +; RV32I-NEXT: beqz t2, .LBB10_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a1, a7 +; RV32I-NEXT: mv a1, t0 ; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: srl a7, a5, a2 -; RV32I-NEXT: sll t1, t3, t2 -; RV32I-NEXT: srl t0, t0, a2 +; RV32I-NEXT: srl t2, a5, a2 +; RV32I-NEXT: sll t3, t3, t1 +; RV32I-NEXT: srl t0, a7, a2 ; RV32I-NEXT: beqz a6, .LBB10_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB10_10: -; RV32I-NEXT: or a7, t1, a7 -; RV32I-NEXT: slli t1, a3, 1 -; RV32I-NEXT: sll t1, t1, t2 -; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or a7, t3, t2 +; RV32I-NEXT: slli t2, a3, 1 +; RV32I-NEXT: sll t2, t2, t1 +; RV32I-NEXT: or t0, t2, t0 ; RV32I-NEXT: srl a3, a3, a2 ; RV32I-NEXT: beqz a6, .LBB10_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: .LBB10_12: ; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: sll a4, a4, t1 ; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: slli a2, a5, 1 -; RV32I-NEXT: sll a2, a2, t2 +; RV32I-NEXT: sll a2, a2, t1 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: sw a3, 8(a0) @@ -645,33 +645,33 @@ ; RV32I-NEXT: sw a7, 0(a0) ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz t1, .LBB10_4 +; RV32I-NEXT: mv a5, a7 +; RV32I-NEXT: beqz t2, .LBB10_4 ; RV32I-NEXT: .LBB10_14: -; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: mv a3, t1 ; RV32I-NEXT: bnez a6, .LBB10_5 ; RV32I-NEXT: j .LBB10_6 ; ; RV64I-LABEL: fshr128_minsize: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a4, a2, 64 +; RV64I-NEXT: andi a5, a2, 64 ; RV64I-NEXT: mv a3, a0 -; RV64I-NEXT: beqz a4, .LBB10_2 +; RV64I-NEXT: beqz a5, .LBB10_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a3, a1 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: srl a5, a3, a2 -; RV64I-NEXT: beqz a4, .LBB10_4 +; RV64I-NEXT: srl a4, a3, a2 +; RV64I-NEXT: beqz a5, .LBB10_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: .LBB10_4: ; RV64I-NEXT: slli a0, a1, 1 -; RV64I-NEXT: not a4, a2 -; RV64I-NEXT: sll a0, a0, a4 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: not a5, a2 +; RV64I-NEXT: sll a0, a0, a5 +; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: srl a1, a1, a2 ; RV64I-NEXT: slli a2, a3, 1 -; RV64I-NEXT: sll a2, a2, a4 +; RV64I-NEXT: sll a2, a2, a5 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: ret %res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b)