diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -66,6 +66,8 @@ void getOffsetOpcodes(const StackOffset &Offset, SmallVectorImpl &Ops) const override; + + unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override; }; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -347,3 +347,8 @@ Ops.push_back(dwarf::DW_OP_minus); } } + +unsigned +RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const { + return MF.getSubtarget().hasStdExtC(); +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -73,12 +73,11 @@ // are not part of GPRC, the most restrictive register class used by the // compressed instruction set. This will influence the greedy register // allocator to reduce the use of registers that can't be encoded in 16 bit -// instructions. This affects register allocation even when compressed -// instruction isn't targeted, we see no major negative codegen impact. +// instructions. let RegAltNameIndices = [ABIRegAltName] in { def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>; - let CostPerUse = [1] in { + let CostPerUse = [0, 1] in { def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>; def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>; def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>; @@ -95,7 +94,7 @@ def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>; def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>; def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>; - let CostPerUse = [1] in { + let CostPerUse = [0, 1] in { def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>; def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>; def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>; diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -163,23 +163,23 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a4, 12(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: srli a5, a2, 29 -; RV32I-NEXT: slli a4, a3, 3 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a3, 3 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: srli a3, a3, 29 -; RV32I-NEXT: slli a5, a1, 3 -; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: slli a6, a1, 3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: srli a1, a1, 29 -; RV32I-NEXT: slli a5, a6, 3 -; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: slli a4, a4, 3 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: slli a2, a2, 3 -; RV32I-NEXT: lui a5, 128 -; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: lui a4, 128 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a3, 8(a0) -; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a5, 4(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: jalr zero, 0(ra) ; diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -10,19 +10,19 @@ ; RISCV32-LABEL: addcarry: ; RISCV32: # %bb.0: ; RISCV32-NEXT: mul a4, a0, a3 -; RISCV32-NEXT: mulhu a7, a0, a2 -; RISCV32-NEXT: add a4, a7, a4 -; RISCV32-NEXT: mul a5, a1, a2 -; RISCV32-NEXT: add a6, a4, a5 -; RISCV32-NEXT: sltu t0, a6, a4 -; RISCV32-NEXT: sltu a4, a4, a7 -; RISCV32-NEXT: mulhu a5, a0, a3 -; RISCV32-NEXT: add a4, a5, a4 -; RISCV32-NEXT: mulhu a5, a1, a2 -; RISCV32-NEXT: add a4, a4, a5 -; RISCV32-NEXT: add a4, a4, t0 -; RISCV32-NEXT: mul a5, a1, a3 -; RISCV32-NEXT: add a5, a4, a5 +; RISCV32-NEXT: mulhu a5, a0, a2 +; RISCV32-NEXT: add a6, a5, a4 +; RISCV32-NEXT: mul a4, a1, a2 +; RISCV32-NEXT: add a4, a6, a4 +; RISCV32-NEXT: sltu a7, a4, a6 +; RISCV32-NEXT: sltu a5, a6, a5 +; RISCV32-NEXT: mulhu a6, a0, a3 +; RISCV32-NEXT: add a5, a6, a5 +; RISCV32-NEXT: mulhu a6, a1, a2 +; RISCV32-NEXT: add a5, a5, a6 +; RISCV32-NEXT: add a5, a5, a7 +; RISCV32-NEXT: mul a6, a1, a3 +; RISCV32-NEXT: add a5, a5, a6 ; RISCV32-NEXT: bgez a1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: ; RISCV32-NEXT: sub a5, a5, a2 @@ -32,9 +32,9 @@ ; RISCV32-NEXT: sub a5, a5, a0 ; RISCV32-NEXT: .LBB0_4: ; RISCV32-NEXT: slli a1, a5, 30 -; RISCV32-NEXT: srli a3, a6, 2 +; RISCV32-NEXT: srli a3, a4, 2 ; RISCV32-NEXT: or a1, a1, a3 -; RISCV32-NEXT: slli a3, a6, 30 +; RISCV32-NEXT: slli a3, a4, 30 ; RISCV32-NEXT: mul a0, a0, a2 ; RISCV32-NEXT: srli a0, a0, 2 ; RISCV32-NEXT: or a0, a3, a0 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2008,9 +2008,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB35_2 ; RV32I-NEXT: .LBB35_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 @@ -2027,10 +2027,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB35_1 +; RV32I-NEXT: blt s2, a0, .LBB35_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB35_1 ; RV32I-NEXT: .LBB35_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2043,30 +2043,30 @@ ; ; RV32IA-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB35_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB35_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB35_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB35_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2080,9 +2080,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB35_2 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 @@ -2099,10 +2099,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB35_1 +; RV64I-NEXT: blt s2, a0, .LBB35_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB35_1 ; RV64I-NEXT: .LBB35_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2115,30 +2115,30 @@ ; ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB35_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB35_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB35_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB35_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2156,9 +2156,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB36_2 ; RV32I-NEXT: .LBB36_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB36_2 Depth=1 @@ -2175,10 +2175,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB36_1 +; RV32I-NEXT: blt s2, a0, .LBB36_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB36_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB36_1 ; RV32I-NEXT: .LBB36_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2191,30 +2191,30 @@ ; ; RV32IA-LABEL: atomicrmw_max_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB36_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB36_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB36_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB36_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2228,9 +2228,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB36_2 ; RV64I-NEXT: .LBB36_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB36_2 Depth=1 @@ -2247,10 +2247,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB36_1 +; RV64I-NEXT: blt s2, a0, .LBB36_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB36_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB36_1 ; RV64I-NEXT: .LBB36_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2263,30 +2263,30 @@ ; ; RV64IA-LABEL: atomicrmw_max_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB36_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB36_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB36_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB36_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2304,9 +2304,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB37_2 ; RV32I-NEXT: .LBB37_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB37_2 Depth=1 @@ -2323,10 +2323,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB37_1 +; RV32I-NEXT: blt s2, a0, .LBB37_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB37_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB37_1 ; RV32I-NEXT: .LBB37_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2339,30 +2339,30 @@ ; ; RV32IA-LABEL: atomicrmw_max_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB37_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB37_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB37_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB37_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2376,9 +2376,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB37_2 ; RV64I-NEXT: .LBB37_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB37_2 Depth=1 @@ -2395,10 +2395,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB37_1 +; RV64I-NEXT: blt s2, a0, .LBB37_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB37_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB37_1 ; RV64I-NEXT: .LBB37_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2411,30 +2411,30 @@ ; ; RV64IA-LABEL: atomicrmw_max_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB37_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB37_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB37_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB37_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2452,9 +2452,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB38_2 ; RV32I-NEXT: .LBB38_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB38_2 Depth=1 @@ -2471,10 +2471,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB38_1 +; RV32I-NEXT: blt s2, a0, .LBB38_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB38_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB38_1 ; RV32I-NEXT: .LBB38_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2487,30 +2487,30 @@ ; ; RV32IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB38_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB38_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB38_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB38_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2524,9 +2524,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB38_2 ; RV64I-NEXT: .LBB38_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB38_2 Depth=1 @@ -2543,10 +2543,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB38_1 +; RV64I-NEXT: blt s2, a0, .LBB38_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB38_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB38_1 ; RV64I-NEXT: .LBB38_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2559,30 +2559,30 @@ ; ; RV64IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB38_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB38_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB38_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB38_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2600,9 +2600,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB39_2 ; RV32I-NEXT: .LBB39_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB39_2 Depth=1 @@ -2619,10 +2619,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB39_1 +; RV32I-NEXT: blt s2, a0, .LBB39_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB39_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB39_1 ; RV32I-NEXT: .LBB39_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2635,30 +2635,30 @@ ; ; RV32IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB39_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB39_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB39_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB39_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2672,9 +2672,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB39_2 ; RV64I-NEXT: .LBB39_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB39_2 Depth=1 @@ -2691,10 +2691,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB39_1 +; RV64I-NEXT: blt s2, a0, .LBB39_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB39_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB39_1 ; RV64I-NEXT: .LBB39_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2707,30 +2707,30 @@ ; ; RV64IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB39_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB39_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB39_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB39_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2748,9 +2748,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB40_2 ; RV32I-NEXT: .LBB40_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB40_2 Depth=1 @@ -2767,10 +2767,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB40_1 +; RV32I-NEXT: bge s2, a0, .LBB40_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB40_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB40_1 ; RV32I-NEXT: .LBB40_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2783,30 +2783,30 @@ ; ; RV32IA-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB40_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB40_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB40_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB40_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2820,9 +2820,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB40_2 ; RV64I-NEXT: .LBB40_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB40_2 Depth=1 @@ -2839,10 +2839,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB40_1 +; RV64I-NEXT: bge s2, a0, .LBB40_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB40_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB40_1 ; RV64I-NEXT: .LBB40_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2855,30 +2855,30 @@ ; ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB40_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB40_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB40_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB40_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2896,9 +2896,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB41_2 ; RV32I-NEXT: .LBB41_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB41_2 Depth=1 @@ -2915,10 +2915,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB41_1 +; RV32I-NEXT: bge s2, a0, .LBB41_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB41_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB41_1 ; RV32I-NEXT: .LBB41_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2931,30 +2931,30 @@ ; ; RV32IA-LABEL: atomicrmw_min_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB41_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB41_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB41_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB41_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2968,9 +2968,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB41_2 ; RV64I-NEXT: .LBB41_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB41_2 Depth=1 @@ -2987,10 +2987,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB41_1 +; RV64I-NEXT: bge s2, a0, .LBB41_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB41_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB41_1 ; RV64I-NEXT: .LBB41_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3003,30 +3003,30 @@ ; ; RV64IA-LABEL: atomicrmw_min_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB41_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB41_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB41_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB41_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3044,9 +3044,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB42_2 ; RV32I-NEXT: .LBB42_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB42_2 Depth=1 @@ -3063,10 +3063,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB42_1 +; RV32I-NEXT: bge s2, a0, .LBB42_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB42_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB42_1 ; RV32I-NEXT: .LBB42_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3079,30 +3079,30 @@ ; ; RV32IA-LABEL: atomicrmw_min_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB42_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB42_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB42_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB42_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -3116,9 +3116,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB42_2 ; RV64I-NEXT: .LBB42_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB42_2 Depth=1 @@ -3135,10 +3135,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB42_1 +; RV64I-NEXT: bge s2, a0, .LBB42_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB42_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB42_1 ; RV64I-NEXT: .LBB42_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3151,30 +3151,30 @@ ; ; RV64IA-LABEL: atomicrmw_min_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB42_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB42_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB42_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB42_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3192,9 +3192,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB43_2 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 @@ -3211,10 +3211,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB43_1 +; RV32I-NEXT: bge s2, a0, .LBB43_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB43_1 ; RV32I-NEXT: .LBB43_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3227,30 +3227,30 @@ ; ; RV32IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB43_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB43_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB43_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB43_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -3264,9 +3264,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB43_2 ; RV64I-NEXT: .LBB43_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 @@ -3283,10 +3283,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB43_1 +; RV64I-NEXT: bge s2, a0, .LBB43_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB43_1 ; RV64I-NEXT: .LBB43_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3299,30 +3299,30 @@ ; ; RV64IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB43_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB43_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB43_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB43_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3340,9 +3340,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB44_2 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 @@ -3359,10 +3359,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB44_1 +; RV32I-NEXT: bge s2, a0, .LBB44_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB44_1 ; RV32I-NEXT: .LBB44_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3375,30 +3375,30 @@ ; ; RV32IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB44_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB44_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB44_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB44_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -3412,9 +3412,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB44_2 ; RV64I-NEXT: .LBB44_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 @@ -3431,10 +3431,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB44_1 +; RV64I-NEXT: bge s2, a0, .LBB44_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB44_1 ; RV64I-NEXT: .LBB44_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3447,30 +3447,30 @@ ; ; RV64IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB44_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB44_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB44_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB44_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3488,8 +3488,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 @@ -3505,10 +3505,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB45_1 +; RV32I-NEXT: bltu s2, a0, .LBB45_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB45_1 ; RV32I-NEXT: .LBB45_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3521,23 +3521,23 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB45_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB45_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB45_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3552,8 +3552,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB45_2 ; RV64I-NEXT: .LBB45_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 @@ -3569,10 +3569,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB45_1 +; RV64I-NEXT: bltu s2, a0, .LBB45_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB45_1 ; RV64I-NEXT: .LBB45_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3585,23 +3585,23 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB45_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB45_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB45_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -3620,8 +3620,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 @@ -3637,10 +3637,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB46_1 +; RV32I-NEXT: bltu s2, a0, .LBB46_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB46_1 ; RV32I-NEXT: .LBB46_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3653,23 +3653,23 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB46_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB46_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB46_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3684,8 +3684,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB46_2 ; RV64I-NEXT: .LBB46_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 @@ -3701,10 +3701,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB46_1 +; RV64I-NEXT: bltu s2, a0, .LBB46_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB46_1 ; RV64I-NEXT: .LBB46_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3717,23 +3717,23 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB46_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB46_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB46_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -3752,8 +3752,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB47_2 ; RV32I-NEXT: .LBB47_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 @@ -3769,10 +3769,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB47_1 +; RV32I-NEXT: bltu s2, a0, .LBB47_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB47_1 ; RV32I-NEXT: .LBB47_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3785,23 +3785,23 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB47_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB47_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB47_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3816,8 +3816,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB47_2 ; RV64I-NEXT: .LBB47_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 @@ -3833,10 +3833,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB47_1 +; RV64I-NEXT: bltu s2, a0, .LBB47_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB47_1 ; RV64I-NEXT: .LBB47_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3849,23 +3849,23 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB47_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB47_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB47_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -3884,8 +3884,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB48_2 ; RV32I-NEXT: .LBB48_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 @@ -3901,10 +3901,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB48_1 +; RV32I-NEXT: bltu s2, a0, .LBB48_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB48_1 ; RV32I-NEXT: .LBB48_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3917,23 +3917,23 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB48_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB48_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB48_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3948,8 +3948,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB48_2 ; RV64I-NEXT: .LBB48_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 @@ -3965,10 +3965,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB48_1 +; RV64I-NEXT: bltu s2, a0, .LBB48_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB48_1 ; RV64I-NEXT: .LBB48_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3981,23 +3981,23 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB48_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB48_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB48_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4016,8 +4016,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB49_2 ; RV32I-NEXT: .LBB49_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 @@ -4033,10 +4033,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB49_1 +; RV32I-NEXT: bltu s2, a0, .LBB49_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB49_1 ; RV32I-NEXT: .LBB49_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4049,23 +4049,23 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB49_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB49_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB49_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4080,8 +4080,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB49_2 ; RV64I-NEXT: .LBB49_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 @@ -4097,10 +4097,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB49_1 +; RV64I-NEXT: bltu s2, a0, .LBB49_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB49_1 ; RV64I-NEXT: .LBB49_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4113,23 +4113,23 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB49_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB49_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB49_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4148,8 +4148,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB50_2 ; RV32I-NEXT: .LBB50_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 @@ -4165,10 +4165,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB50_1 +; RV32I-NEXT: bgeu s2, a0, .LBB50_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB50_1 ; RV32I-NEXT: .LBB50_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4181,23 +4181,23 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB50_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB50_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB50_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4212,8 +4212,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB50_2 ; RV64I-NEXT: .LBB50_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 @@ -4229,10 +4229,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB50_1 +; RV64I-NEXT: bgeu s2, a0, .LBB50_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB50_1 ; RV64I-NEXT: .LBB50_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4245,23 +4245,23 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB50_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB50_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB50_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4280,8 +4280,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB51_2 ; RV32I-NEXT: .LBB51_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 @@ -4297,10 +4297,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB51_1 +; RV32I-NEXT: bgeu s2, a0, .LBB51_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB51_1 ; RV32I-NEXT: .LBB51_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4313,23 +4313,23 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB51_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB51_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB51_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4344,8 +4344,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB51_2 ; RV64I-NEXT: .LBB51_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 @@ -4361,10 +4361,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB51_1 +; RV64I-NEXT: bgeu s2, a0, .LBB51_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB51_1 ; RV64I-NEXT: .LBB51_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4377,23 +4377,23 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB51_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB51_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB51_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4412,8 +4412,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB52_2 ; RV32I-NEXT: .LBB52_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 @@ -4429,10 +4429,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB52_1 +; RV32I-NEXT: bgeu s2, a0, .LBB52_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB52_1 ; RV32I-NEXT: .LBB52_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4445,23 +4445,23 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB52_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB52_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB52_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4476,8 +4476,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB52_2 ; RV64I-NEXT: .LBB52_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 @@ -4493,10 +4493,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB52_1 +; RV64I-NEXT: bgeu s2, a0, .LBB52_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB52_1 ; RV64I-NEXT: .LBB52_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4509,23 +4509,23 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB52_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB52_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB52_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4544,8 +4544,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB53_2 ; RV32I-NEXT: .LBB53_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 @@ -4561,10 +4561,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB53_1 +; RV32I-NEXT: bgeu s2, a0, .LBB53_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB53_1 ; RV32I-NEXT: .LBB53_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4577,23 +4577,23 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB53_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB53_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB53_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4608,8 +4608,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB53_2 ; RV64I-NEXT: .LBB53_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 @@ -4625,10 +4625,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB53_1 +; RV64I-NEXT: bgeu s2, a0, .LBB53_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB53_1 ; RV64I-NEXT: .LBB53_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4641,23 +4641,23 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB53_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB53_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB53_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4676,8 +4676,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB54_2 ; RV32I-NEXT: .LBB54_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 @@ -4693,10 +4693,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB54_1 +; RV32I-NEXT: bgeu s2, a0, .LBB54_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB54_1 ; RV32I-NEXT: .LBB54_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4709,23 +4709,23 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB54_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB54_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB54_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4740,8 +4740,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB54_2 ; RV64I-NEXT: .LBB54_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 @@ -4757,10 +4757,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB54_1 +; RV64I-NEXT: bgeu s2, a0, .LBB54_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB54_1 ; RV64I-NEXT: .LBB54_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4773,23 +4773,23 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB54_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB54_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB54_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -6868,9 +6868,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB90_2 ; RV32I-NEXT: .LBB90_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB90_2 Depth=1 @@ -6887,10 +6887,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB90_1 +; RV32I-NEXT: blt s2, a0, .LBB90_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB90_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB90_1 ; RV32I-NEXT: .LBB90_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -6903,31 +6903,31 @@ ; ; RV32IA-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB90_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB90_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB90_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB90_3: # in Loop: Header=BB90_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB90_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB90_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -6941,9 +6941,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB90_2 ; RV64I-NEXT: .LBB90_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB90_2 Depth=1 @@ -6960,10 +6960,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB90_1 +; RV64I-NEXT: blt s2, a0, .LBB90_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB90_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB90_1 ; RV64I-NEXT: .LBB90_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -6976,31 +6976,31 @@ ; ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB90_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB90_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB90_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB90_3: # in Loop: Header=BB90_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB90_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB90_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7018,9 +7018,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB91_2 ; RV32I-NEXT: .LBB91_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB91_2 Depth=1 @@ -7037,10 +7037,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB91_1 +; RV32I-NEXT: blt s2, a0, .LBB91_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB91_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB91_1 ; RV32I-NEXT: .LBB91_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7053,31 +7053,31 @@ ; ; RV32IA-LABEL: atomicrmw_max_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB91_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB91_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB91_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB91_3: # in Loop: Header=BB91_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB91_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB91_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7091,9 +7091,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB91_2 ; RV64I-NEXT: .LBB91_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB91_2 Depth=1 @@ -7110,10 +7110,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB91_1 +; RV64I-NEXT: blt s2, a0, .LBB91_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB91_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB91_1 ; RV64I-NEXT: .LBB91_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7126,31 +7126,31 @@ ; ; RV64IA-LABEL: atomicrmw_max_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB91_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB91_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB91_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB91_3: # in Loop: Header=BB91_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB91_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB91_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7168,9 +7168,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB92_2 ; RV32I-NEXT: .LBB92_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB92_2 Depth=1 @@ -7187,10 +7187,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB92_1 +; RV32I-NEXT: blt s2, a0, .LBB92_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB92_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB92_1 ; RV32I-NEXT: .LBB92_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7203,31 +7203,31 @@ ; ; RV32IA-LABEL: atomicrmw_max_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB92_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB92_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB92_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB92_3: # in Loop: Header=BB92_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB92_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB92_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7241,9 +7241,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB92_2 ; RV64I-NEXT: .LBB92_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB92_2 Depth=1 @@ -7260,10 +7260,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB92_1 +; RV64I-NEXT: blt s2, a0, .LBB92_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB92_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB92_1 ; RV64I-NEXT: .LBB92_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7276,31 +7276,31 @@ ; ; RV64IA-LABEL: atomicrmw_max_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB92_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB92_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB92_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB92_3: # in Loop: Header=BB92_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB92_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB92_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7318,9 +7318,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB93_2 ; RV32I-NEXT: .LBB93_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB93_2 Depth=1 @@ -7337,10 +7337,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB93_1 +; RV32I-NEXT: blt s2, a0, .LBB93_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB93_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB93_1 ; RV32I-NEXT: .LBB93_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7353,31 +7353,31 @@ ; ; RV32IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB93_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB93_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB93_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB93_3: # in Loop: Header=BB93_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB93_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB93_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7391,9 +7391,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB93_2 ; RV64I-NEXT: .LBB93_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB93_2 Depth=1 @@ -7410,10 +7410,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB93_1 +; RV64I-NEXT: blt s2, a0, .LBB93_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB93_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB93_1 ; RV64I-NEXT: .LBB93_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7426,31 +7426,31 @@ ; ; RV64IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB93_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB93_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB93_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB93_3: # in Loop: Header=BB93_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB93_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB93_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7468,9 +7468,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB94_2 ; RV32I-NEXT: .LBB94_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB94_2 Depth=1 @@ -7487,10 +7487,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB94_1 +; RV32I-NEXT: blt s2, a0, .LBB94_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB94_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB94_1 ; RV32I-NEXT: .LBB94_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7503,31 +7503,31 @@ ; ; RV32IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB94_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB94_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB94_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB94_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7541,9 +7541,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB94_2 ; RV64I-NEXT: .LBB94_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB94_2 Depth=1 @@ -7560,10 +7560,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB94_1 +; RV64I-NEXT: blt s2, a0, .LBB94_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB94_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB94_1 ; RV64I-NEXT: .LBB94_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7576,31 +7576,31 @@ ; ; RV64IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB94_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB94_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB94_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB94_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7618,9 +7618,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB95_2 ; RV32I-NEXT: .LBB95_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB95_2 Depth=1 @@ -7637,10 +7637,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB95_1 +; RV32I-NEXT: bge s2, a0, .LBB95_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB95_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB95_1 ; RV32I-NEXT: .LBB95_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7653,31 +7653,31 @@ ; ; RV32IA-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB95_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB95_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB95_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB95_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7691,9 +7691,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB95_2 ; RV64I-NEXT: .LBB95_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB95_2 Depth=1 @@ -7710,10 +7710,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB95_1 +; RV64I-NEXT: bge s2, a0, .LBB95_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB95_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB95_1 ; RV64I-NEXT: .LBB95_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7726,31 +7726,31 @@ ; ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB95_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB95_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB95_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB95_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7768,9 +7768,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB96_2 ; RV32I-NEXT: .LBB96_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB96_2 Depth=1 @@ -7787,10 +7787,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB96_1 +; RV32I-NEXT: bge s2, a0, .LBB96_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB96_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB96_1 ; RV32I-NEXT: .LBB96_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7803,31 +7803,31 @@ ; ; RV32IA-LABEL: atomicrmw_min_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB96_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB96_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB96_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB96_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7841,9 +7841,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB96_2 ; RV64I-NEXT: .LBB96_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB96_2 Depth=1 @@ -7860,10 +7860,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB96_1 +; RV64I-NEXT: bge s2, a0, .LBB96_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB96_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB96_1 ; RV64I-NEXT: .LBB96_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7876,31 +7876,31 @@ ; ; RV64IA-LABEL: atomicrmw_min_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB96_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB96_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB96_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB96_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7918,9 +7918,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB97_2 ; RV32I-NEXT: .LBB97_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB97_2 Depth=1 @@ -7937,10 +7937,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB97_1 +; RV32I-NEXT: bge s2, a0, .LBB97_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB97_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB97_1 ; RV32I-NEXT: .LBB97_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7953,31 +7953,31 @@ ; ; RV32IA-LABEL: atomicrmw_min_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB97_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB97_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB97_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB97_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7991,9 +7991,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB97_2 ; RV64I-NEXT: .LBB97_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB97_2 Depth=1 @@ -8010,10 +8010,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB97_1 +; RV64I-NEXT: bge s2, a0, .LBB97_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB97_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB97_1 ; RV64I-NEXT: .LBB97_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -8026,31 +8026,31 @@ ; ; RV64IA-LABEL: atomicrmw_min_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB97_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB97_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB97_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB97_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -8068,9 +8068,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB98_2 ; RV32I-NEXT: .LBB98_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB98_2 Depth=1 @@ -8087,10 +8087,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB98_1 +; RV32I-NEXT: bge s2, a0, .LBB98_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB98_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB98_1 ; RV32I-NEXT: .LBB98_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -8103,31 +8103,31 @@ ; ; RV32IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB98_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB98_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB98_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB98_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -8141,9 +8141,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB98_2 ; RV64I-NEXT: .LBB98_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB98_2 Depth=1 @@ -8160,10 +8160,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB98_1 +; RV64I-NEXT: bge s2, a0, .LBB98_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB98_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB98_1 ; RV64I-NEXT: .LBB98_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -8176,31 +8176,31 @@ ; ; RV64IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB98_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB98_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB98_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB98_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -8218,9 +8218,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB99_2 ; RV32I-NEXT: .LBB99_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB99_2 Depth=1 @@ -8237,10 +8237,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB99_1 +; RV32I-NEXT: bge s2, a0, .LBB99_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB99_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB99_1 ; RV32I-NEXT: .LBB99_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -8253,31 +8253,31 @@ ; ; RV32IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB99_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB99_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB99_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB99_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -8291,9 +8291,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB99_2 ; RV64I-NEXT: .LBB99_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB99_2 Depth=1 @@ -8310,10 +8310,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB99_1 +; RV64I-NEXT: bge s2, a0, .LBB99_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB99_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB99_1 ; RV64I-NEXT: .LBB99_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -8326,31 +8326,31 @@ ; ; RV64IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB99_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB99_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB99_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB99_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -8367,18 +8367,18 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB100_2 ; RV32I-NEXT: .LBB100_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB100_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -8386,12 +8386,12 @@ ; RV32I-NEXT: bnez a0, .LBB100_4 ; RV32I-NEXT: .LBB100_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB100_1 +; RV32I-NEXT: bltu s3, a0, .LBB100_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB100_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB100_1 ; RV32I-NEXT: .LBB100_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8405,7 +8405,7 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8413,16 +8413,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB100_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB100_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB100_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8436,18 +8436,18 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB100_2 ; RV64I-NEXT: .LBB100_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB100_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -8455,12 +8455,12 @@ ; RV64I-NEXT: bnez a0, .LBB100_4 ; RV64I-NEXT: .LBB100_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB100_1 +; RV64I-NEXT: bltu s3, a0, .LBB100_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB100_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB100_1 ; RV64I-NEXT: .LBB100_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8474,7 +8474,7 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8482,16 +8482,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB100_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB100_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB100_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8509,12 +8509,12 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB101_2 ; RV32I-NEXT: .LBB101_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB101_2 Depth=1 @@ -8522,18 +8522,18 @@ ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB101_4 ; RV32I-NEXT: .LBB101_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB101_1 +; RV32I-NEXT: bltu s3, a0, .LBB101_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB101_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB101_1 ; RV32I-NEXT: .LBB101_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8547,7 +8547,7 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8555,16 +8555,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB101_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB101_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB101_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8578,12 +8578,12 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB101_2 ; RV64I-NEXT: .LBB101_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB101_2 Depth=1 @@ -8591,18 +8591,18 @@ ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB101_4 ; RV64I-NEXT: .LBB101_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB101_1 +; RV64I-NEXT: bltu s3, a0, .LBB101_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB101_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB101_1 ; RV64I-NEXT: .LBB101_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8616,7 +8616,7 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8624,16 +8624,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB101_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB101_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB101_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8651,31 +8651,31 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB102_2 ; RV32I-NEXT: .LBB102_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB102_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB102_4 ; RV32I-NEXT: .LBB102_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB102_1 +; RV32I-NEXT: bltu s3, a0, .LBB102_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB102_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB102_1 ; RV32I-NEXT: .LBB102_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8689,7 +8689,7 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8697,16 +8697,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB102_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB102_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB102_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB102_3: # in Loop: Header=BB102_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB102_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8720,31 +8720,31 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB102_2 ; RV64I-NEXT: .LBB102_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB102_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB102_4 ; RV64I-NEXT: .LBB102_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB102_1 +; RV64I-NEXT: bltu s3, a0, .LBB102_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB102_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB102_1 ; RV64I-NEXT: .LBB102_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8758,7 +8758,7 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8766,16 +8766,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB102_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB102_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB102_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB102_3: # in Loop: Header=BB102_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB102_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8793,12 +8793,12 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB103_2 ; RV32I-NEXT: .LBB103_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB103_2 Depth=1 @@ -8806,18 +8806,18 @@ ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB103_4 ; RV32I-NEXT: .LBB103_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB103_1 +; RV32I-NEXT: bltu s3, a0, .LBB103_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB103_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB103_1 ; RV32I-NEXT: .LBB103_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8831,7 +8831,7 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8839,16 +8839,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB103_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB103_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB103_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB103_3: # in Loop: Header=BB103_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB103_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8862,12 +8862,12 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB103_2 ; RV64I-NEXT: .LBB103_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB103_2 Depth=1 @@ -8875,18 +8875,18 @@ ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB103_4 ; RV64I-NEXT: .LBB103_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB103_1 +; RV64I-NEXT: bltu s3, a0, .LBB103_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB103_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB103_1 ; RV64I-NEXT: .LBB103_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8900,7 +8900,7 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8908,16 +8908,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB103_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB103_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB103_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB103_3: # in Loop: Header=BB103_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB103_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8935,12 +8935,12 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB104_2 ; RV32I-NEXT: .LBB104_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB104_2 Depth=1 @@ -8948,18 +8948,18 @@ ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB104_4 ; RV32I-NEXT: .LBB104_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB104_1 +; RV32I-NEXT: bltu s3, a0, .LBB104_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB104_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB104_1 ; RV32I-NEXT: .LBB104_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8973,7 +8973,7 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8981,16 +8981,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB104_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB104_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB104_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB104_3: # in Loop: Header=BB104_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB104_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9004,12 +9004,12 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB104_2 ; RV64I-NEXT: .LBB104_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB104_2 Depth=1 @@ -9017,18 +9017,18 @@ ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB104_4 ; RV64I-NEXT: .LBB104_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB104_1 +; RV64I-NEXT: bltu s3, a0, .LBB104_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB104_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB104_1 ; RV64I-NEXT: .LBB104_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9042,7 +9042,7 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9050,16 +9050,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB104_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB104_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB104_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB104_3: # in Loop: Header=BB104_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB104_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9077,18 +9077,18 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB105_2 ; RV32I-NEXT: .LBB105_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB105_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -9096,12 +9096,12 @@ ; RV32I-NEXT: bnez a0, .LBB105_4 ; RV32I-NEXT: .LBB105_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB105_1 +; RV32I-NEXT: bgeu s3, a0, .LBB105_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB105_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB105_1 ; RV32I-NEXT: .LBB105_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9115,7 +9115,7 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9123,16 +9123,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB105_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB105_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB105_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB105_3: # in Loop: Header=BB105_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB105_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9146,18 +9146,18 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB105_2 ; RV64I-NEXT: .LBB105_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB105_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -9165,12 +9165,12 @@ ; RV64I-NEXT: bnez a0, .LBB105_4 ; RV64I-NEXT: .LBB105_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB105_1 +; RV64I-NEXT: bgeu s3, a0, .LBB105_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB105_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB105_1 ; RV64I-NEXT: .LBB105_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9184,7 +9184,7 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9192,16 +9192,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB105_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB105_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB105_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB105_3: # in Loop: Header=BB105_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB105_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9219,12 +9219,12 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB106_2 ; RV32I-NEXT: .LBB106_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB106_2 Depth=1 @@ -9232,18 +9232,18 @@ ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB106_4 ; RV32I-NEXT: .LBB106_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB106_1 +; RV32I-NEXT: bgeu s3, a0, .LBB106_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB106_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB106_1 ; RV32I-NEXT: .LBB106_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9257,7 +9257,7 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9265,16 +9265,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB106_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB106_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB106_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB106_3: # in Loop: Header=BB106_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB106_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9288,12 +9288,12 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB106_2 ; RV64I-NEXT: .LBB106_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB106_2 Depth=1 @@ -9301,18 +9301,18 @@ ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB106_4 ; RV64I-NEXT: .LBB106_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB106_1 +; RV64I-NEXT: bgeu s3, a0, .LBB106_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB106_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB106_1 ; RV64I-NEXT: .LBB106_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9326,7 +9326,7 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9334,16 +9334,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB106_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB106_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB106_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB106_3: # in Loop: Header=BB106_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB106_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9361,31 +9361,31 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB107_2 ; RV32I-NEXT: .LBB107_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB107_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB107_4 ; RV32I-NEXT: .LBB107_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB107_1 +; RV32I-NEXT: bgeu s3, a0, .LBB107_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB107_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB107_1 ; RV32I-NEXT: .LBB107_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9399,7 +9399,7 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9407,16 +9407,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB107_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB107_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB107_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB107_3: # in Loop: Header=BB107_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB107_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9430,31 +9430,31 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB107_2 ; RV64I-NEXT: .LBB107_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB107_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB107_4 ; RV64I-NEXT: .LBB107_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB107_1 +; RV64I-NEXT: bgeu s3, a0, .LBB107_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB107_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB107_1 ; RV64I-NEXT: .LBB107_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9468,7 +9468,7 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9476,16 +9476,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB107_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB107_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB107_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB107_3: # in Loop: Header=BB107_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB107_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9503,12 +9503,12 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB108_2 ; RV32I-NEXT: .LBB108_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB108_2 Depth=1 @@ -9516,18 +9516,18 @@ ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB108_4 ; RV32I-NEXT: .LBB108_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB108_1 +; RV32I-NEXT: bgeu s3, a0, .LBB108_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB108_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB108_1 ; RV32I-NEXT: .LBB108_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9541,7 +9541,7 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9549,16 +9549,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB108_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB108_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB108_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB108_3: # in Loop: Header=BB108_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB108_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9572,12 +9572,12 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB108_2 ; RV64I-NEXT: .LBB108_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB108_2 Depth=1 @@ -9585,18 +9585,18 @@ ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB108_4 ; RV64I-NEXT: .LBB108_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB108_1 +; RV64I-NEXT: bgeu s3, a0, .LBB108_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB108_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB108_1 ; RV64I-NEXT: .LBB108_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9610,7 +9610,7 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9618,16 +9618,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB108_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB108_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB108_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB108_3: # in Loop: Header=BB108_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB108_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9645,12 +9645,12 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB109_2 ; RV32I-NEXT: .LBB109_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB109_2 Depth=1 @@ -9658,18 +9658,18 @@ ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB109_4 ; RV32I-NEXT: .LBB109_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB109_1 +; RV32I-NEXT: bgeu s3, a0, .LBB109_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB109_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB109_1 ; RV32I-NEXT: .LBB109_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9683,7 +9683,7 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9691,16 +9691,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB109_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB109_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB109_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB109_3: # in Loop: Header=BB109_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB109_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9714,12 +9714,12 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB109_2 ; RV64I-NEXT: .LBB109_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB109_2 Depth=1 @@ -9727,18 +9727,18 @@ ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB109_4 ; RV64I-NEXT: .LBB109_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB109_1 +; RV64I-NEXT: bgeu s3, a0, .LBB109_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB109_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB109_1 ; RV64I-NEXT: .LBB109_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9752,7 +9752,7 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9760,16 +9760,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB109_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB109_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB109_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB109_3: # in Loop: Header=BB109_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB109_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -11099,8 +11099,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB145_2 ; RV64I-NEXT: .LBB145_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB145_2 Depth=1 @@ -11115,10 +11115,10 @@ ; RV64I-NEXT: .LBB145_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB145_1 +; RV64I-NEXT: blt s2, a3, .LBB145_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB145_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB145_1 ; RV64I-NEXT: .LBB145_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11188,8 +11188,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB146_2 ; RV64I-NEXT: .LBB146_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB146_2 Depth=1 @@ -11204,10 +11204,10 @@ ; RV64I-NEXT: .LBB146_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB146_1 +; RV64I-NEXT: blt s2, a3, .LBB146_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB146_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB146_1 ; RV64I-NEXT: .LBB146_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11277,8 +11277,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB147_2 ; RV64I-NEXT: .LBB147_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB147_2 Depth=1 @@ -11293,10 +11293,10 @@ ; RV64I-NEXT: .LBB147_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB147_1 +; RV64I-NEXT: blt s2, a3, .LBB147_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB147_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB147_1 ; RV64I-NEXT: .LBB147_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11366,8 +11366,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB148_2 ; RV64I-NEXT: .LBB148_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB148_2 Depth=1 @@ -11382,10 +11382,10 @@ ; RV64I-NEXT: .LBB148_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB148_1 +; RV64I-NEXT: blt s2, a3, .LBB148_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB148_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB148_1 ; RV64I-NEXT: .LBB148_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11455,8 +11455,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB149_2 ; RV64I-NEXT: .LBB149_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB149_2 Depth=1 @@ -11471,10 +11471,10 @@ ; RV64I-NEXT: .LBB149_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB149_1 +; RV64I-NEXT: blt s2, a3, .LBB149_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB149_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB149_1 ; RV64I-NEXT: .LBB149_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11544,8 +11544,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB150_2 ; RV64I-NEXT: .LBB150_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB150_2 Depth=1 @@ -11560,10 +11560,10 @@ ; RV64I-NEXT: .LBB150_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB150_1 +; RV64I-NEXT: bge s2, a3, .LBB150_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB150_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB150_1 ; RV64I-NEXT: .LBB150_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11633,8 +11633,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB151_2 ; RV64I-NEXT: .LBB151_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB151_2 Depth=1 @@ -11649,10 +11649,10 @@ ; RV64I-NEXT: .LBB151_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB151_1 +; RV64I-NEXT: bge s2, a3, .LBB151_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB151_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB151_1 ; RV64I-NEXT: .LBB151_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11722,8 +11722,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB152_2 ; RV64I-NEXT: .LBB152_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB152_2 Depth=1 @@ -11738,10 +11738,10 @@ ; RV64I-NEXT: .LBB152_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB152_1 +; RV64I-NEXT: bge s2, a3, .LBB152_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB152_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB152_1 ; RV64I-NEXT: .LBB152_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11811,8 +11811,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB153_2 ; RV64I-NEXT: .LBB153_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB153_2 Depth=1 @@ -11827,10 +11827,10 @@ ; RV64I-NEXT: .LBB153_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB153_1 +; RV64I-NEXT: bge s2, a3, .LBB153_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB153_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB153_1 ; RV64I-NEXT: .LBB153_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11900,8 +11900,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB154_2 ; RV64I-NEXT: .LBB154_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB154_2 Depth=1 @@ -11916,10 +11916,10 @@ ; RV64I-NEXT: .LBB154_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB154_1 +; RV64I-NEXT: bge s2, a3, .LBB154_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB154_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB154_1 ; RV64I-NEXT: .LBB154_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11989,8 +11989,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB155_2 ; RV64I-NEXT: .LBB155_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB155_2 Depth=1 @@ -12005,10 +12005,10 @@ ; RV64I-NEXT: .LBB155_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB155_1 +; RV64I-NEXT: bltu s2, a3, .LBB155_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB155_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB155_1 ; RV64I-NEXT: .LBB155_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12078,8 +12078,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB156_2 ; RV64I-NEXT: .LBB156_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB156_2 Depth=1 @@ -12094,10 +12094,10 @@ ; RV64I-NEXT: .LBB156_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB156_1 +; RV64I-NEXT: bltu s2, a3, .LBB156_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB156_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB156_1 ; RV64I-NEXT: .LBB156_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12167,8 +12167,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB157_2 ; RV64I-NEXT: .LBB157_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB157_2 Depth=1 @@ -12183,10 +12183,10 @@ ; RV64I-NEXT: .LBB157_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB157_1 +; RV64I-NEXT: bltu s2, a3, .LBB157_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB157_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB157_1 ; RV64I-NEXT: .LBB157_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12256,8 +12256,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB158_2 ; RV64I-NEXT: .LBB158_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB158_2 Depth=1 @@ -12272,10 +12272,10 @@ ; RV64I-NEXT: .LBB158_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB158_1 +; RV64I-NEXT: bltu s2, a3, .LBB158_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB158_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB158_1 ; RV64I-NEXT: .LBB158_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12345,8 +12345,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB159_2 ; RV64I-NEXT: .LBB159_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB159_2 Depth=1 @@ -12361,10 +12361,10 @@ ; RV64I-NEXT: .LBB159_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB159_1 +; RV64I-NEXT: bltu s2, a3, .LBB159_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB159_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB159_1 ; RV64I-NEXT: .LBB159_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12434,8 +12434,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB160_2 ; RV64I-NEXT: .LBB160_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB160_2 Depth=1 @@ -12450,10 +12450,10 @@ ; RV64I-NEXT: .LBB160_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB160_1 +; RV64I-NEXT: bgeu s2, a3, .LBB160_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB160_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB160_1 ; RV64I-NEXT: .LBB160_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12523,8 +12523,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB161_2 ; RV64I-NEXT: .LBB161_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB161_2 Depth=1 @@ -12539,10 +12539,10 @@ ; RV64I-NEXT: .LBB161_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB161_1 +; RV64I-NEXT: bgeu s2, a3, .LBB161_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB161_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB161_1 ; RV64I-NEXT: .LBB161_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12612,8 +12612,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB162_2 ; RV64I-NEXT: .LBB162_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB162_2 Depth=1 @@ -12628,10 +12628,10 @@ ; RV64I-NEXT: .LBB162_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB162_1 +; RV64I-NEXT: bgeu s2, a3, .LBB162_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB162_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB162_1 ; RV64I-NEXT: .LBB162_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12701,8 +12701,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB163_2 ; RV64I-NEXT: .LBB163_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB163_2 Depth=1 @@ -12717,10 +12717,10 @@ ; RV64I-NEXT: .LBB163_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB163_1 +; RV64I-NEXT: bgeu s2, a3, .LBB163_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB163_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB163_1 ; RV64I-NEXT: .LBB163_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12790,8 +12790,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB164_2 ; RV64I-NEXT: .LBB164_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB164_2 Depth=1 @@ -12806,10 +12806,10 @@ ; RV64I-NEXT: .LBB164_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB164_1 +; RV64I-NEXT: bgeu s2, a3, .LBB164_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB164_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB164_1 ; RV64I-NEXT: .LBB164_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -584,9 +584,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB10_2 ; RV32I-NEXT: .LBB10_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1 @@ -603,10 +603,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB10_1 +; RV32I-NEXT: blt s2, a0, .LBB10_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB10_1 ; RV32I-NEXT: .LBB10_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -620,30 +620,30 @@ ; ; RV32IA-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB10_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB10_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB10_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB10_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 24 @@ -659,9 +659,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB10_2 ; RV64I-NEXT: .LBB10_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1 @@ -678,10 +678,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB10_1 +; RV64I-NEXT: blt s2, a0, .LBB10_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB10_1 ; RV64I-NEXT: .LBB10_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -695,30 +695,30 @@ ; ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB10_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB10_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB10_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB10_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 56 @@ -738,9 +738,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB11_2 ; RV32I-NEXT: .LBB11_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1 @@ -757,10 +757,10 @@ ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB11_1 +; RV32I-NEXT: bge s2, a0, .LBB11_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB11_1 ; RV32I-NEXT: .LBB11_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -774,30 +774,30 @@ ; ; RV32IA-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB11_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB11_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB11_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB11_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 24 @@ -813,9 +813,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB11_2 ; RV64I-NEXT: .LBB11_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1 @@ -832,10 +832,10 @@ ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB11_1 +; RV64I-NEXT: bge s2, a0, .LBB11_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB11_1 ; RV64I-NEXT: .LBB11_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -849,30 +849,30 @@ ; ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB11_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB11_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB11_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB11_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 56 @@ -892,8 +892,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB12_2 ; RV32I-NEXT: .LBB12_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1 @@ -909,10 +909,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB12_1 +; RV32I-NEXT: bltu s2, a0, .LBB12_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB12_1 ; RV32I-NEXT: .LBB12_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -926,23 +926,23 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB12_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB12_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB12_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -959,8 +959,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB12_2 ; RV64I-NEXT: .LBB12_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1 @@ -976,10 +976,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB12_1 +; RV64I-NEXT: bltu s2, a0, .LBB12_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB12_1 ; RV64I-NEXT: .LBB12_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -993,23 +993,23 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB12_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB12_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB12_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -1030,8 +1030,8 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB13_2 ; RV32I-NEXT: .LBB13_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1 @@ -1047,10 +1047,10 @@ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB13_1 +; RV32I-NEXT: bgeu s2, a0, .LBB13_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB13_1 ; RV32I-NEXT: .LBB13_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -1064,23 +1064,23 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB13_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB13_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB13_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -1097,8 +1097,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB13_2 ; RV64I-NEXT: .LBB13_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1 @@ -1114,10 +1114,10 @@ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB13_1 +; RV64I-NEXT: bgeu s2, a0, .LBB13_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB13_1 ; RV64I-NEXT: .LBB13_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -1131,23 +1131,23 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB13_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB13_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB13_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -1636,9 +1636,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB21_2 ; RV32I-NEXT: .LBB21_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1 @@ -1655,10 +1655,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB21_1 +; RV32I-NEXT: blt s2, a0, .LBB21_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB21_1 ; RV32I-NEXT: .LBB21_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 16 @@ -1672,31 +1672,31 @@ ; ; RV32IA-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB21_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB21_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB21_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB21_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 16 @@ -1712,9 +1712,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB21_2 ; RV64I-NEXT: .LBB21_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1 @@ -1731,10 +1731,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB21_1 +; RV64I-NEXT: blt s2, a0, .LBB21_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB21_1 ; RV64I-NEXT: .LBB21_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 48 @@ -1748,31 +1748,31 @@ ; ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB21_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB21_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB21_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB21_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 48 @@ -1792,9 +1792,9 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB22_2 ; RV32I-NEXT: .LBB22_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1 @@ -1811,10 +1811,10 @@ ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB22_1 +; RV32I-NEXT: bge s2, a0, .LBB22_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB22_1 ; RV32I-NEXT: .LBB22_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 16 @@ -1828,31 +1828,31 @@ ; ; RV32IA-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB22_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB22_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB22_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB22_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 16 @@ -1868,9 +1868,9 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB22_2 ; RV64I-NEXT: .LBB22_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1 @@ -1887,10 +1887,10 @@ ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB22_1 +; RV64I-NEXT: bge s2, a0, .LBB22_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB22_1 ; RV64I-NEXT: .LBB22_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 48 @@ -1904,31 +1904,31 @@ ; ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB22_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB22_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB22_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB22_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 48 @@ -1947,18 +1947,18 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB23_2 ; RV32I-NEXT: .LBB23_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -1966,12 +1966,12 @@ ; RV32I-NEXT: bnez a0, .LBB23_4 ; RV32I-NEXT: .LBB23_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB23_1 +; RV32I-NEXT: bltu s3, a0, .LBB23_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB23_1 ; RV32I-NEXT: .LBB23_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a1, 16 @@ -1986,7 +1986,7 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -1994,16 +1994,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB23_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB23_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB23_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -2019,18 +2019,18 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB23_2 ; RV64I-NEXT: .LBB23_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -2038,12 +2038,12 @@ ; RV64I-NEXT: bnez a0, .LBB23_4 ; RV64I-NEXT: .LBB23_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB23_1 +; RV64I-NEXT: bltu s3, a0, .LBB23_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB23_1 ; RV64I-NEXT: .LBB23_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a1, 48 @@ -2058,7 +2058,7 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -2066,16 +2066,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB23_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB23_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB23_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -2095,18 +2095,18 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB24_2 ; RV32I-NEXT: .LBB24_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -2114,12 +2114,12 @@ ; RV32I-NEXT: bnez a0, .LBB24_4 ; RV32I-NEXT: .LBB24_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB24_1 +; RV32I-NEXT: bgeu s3, a0, .LBB24_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB24_1 ; RV32I-NEXT: .LBB24_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a1, 16 @@ -2134,7 +2134,7 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -2142,16 +2142,16 @@ ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB24_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB24_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB24_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -2167,18 +2167,18 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB24_2 ; RV64I-NEXT: .LBB24_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -2186,12 +2186,12 @@ ; RV64I-NEXT: bnez a0, .LBB24_4 ; RV64I-NEXT: .LBB24_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB24_1 +; RV64I-NEXT: bgeu s3, a0, .LBB24_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB24_1 ; RV64I-NEXT: .LBB24_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a1, 48 @@ -2206,7 +2206,7 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -2214,16 +2214,16 @@ ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB24_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB24_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB24_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -2546,8 +2546,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB32_2 ; RV64I-NEXT: .LBB32_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1 @@ -2562,10 +2562,10 @@ ; RV64I-NEXT: .LBB32_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB32_1 +; RV64I-NEXT: blt s2, a3, .LBB32_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB32_1 ; RV64I-NEXT: .LBB32_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2635,8 +2635,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB33_2 ; RV64I-NEXT: .LBB33_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1 @@ -2651,10 +2651,10 @@ ; RV64I-NEXT: .LBB33_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB33_1 +; RV64I-NEXT: bge s2, a3, .LBB33_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB33_1 ; RV64I-NEXT: .LBB33_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2724,8 +2724,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB34_2 ; RV64I-NEXT: .LBB34_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1 @@ -2740,10 +2740,10 @@ ; RV64I-NEXT: .LBB34_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB34_1 +; RV64I-NEXT: bltu s2, a3, .LBB34_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB34_1 ; RV64I-NEXT: .LBB34_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2813,8 +2813,8 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB35_2 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 @@ -2829,10 +2829,10 @@ ; RV64I-NEXT: .LBB35_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB35_1 +; RV64I-NEXT: bgeu s2, a3, .LBB35_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB35_1 ; RV64I-NEXT: .LBB35_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -437,21 +437,21 @@ ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: not a1, s2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -459,32 +459,32 @@ ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: addi a0, s3, -1 -; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a0, s1, -1 +; RV32I-NEXT: not a1, s1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s4, .LBB7_2 +; RV32I-NEXT: bnez s2, .LBB7_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s0, 24 ; RV32I-NEXT: .LBB7_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -711,21 +711,21 @@ ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: not a1, s2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -733,32 +733,32 @@ ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: addi a0, s3, -1 -; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a0, s1, -1 +; RV32I-NEXT: not a1, s1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s4, .LBB11_2 +; RV32I-NEXT: bnez s2, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB11_3 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s0, 24 ; RV32I-NEXT: .LBB11_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -877,17 +877,17 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: addi s2, a2, 1365 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s3, a1, 819 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -899,12 +899,12 @@ ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3@plt ; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s2, a0 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, s0, 1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: sub a0, s0, a0 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -56,26 +56,26 @@ ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) ; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw t4, 24(a5) -; RV32I-NEXT: lw t5, 28(a5) -; RV32I-NEXT: lw t6, 32(a5) -; RV32I-NEXT: lw s2, 36(a5) -; RV32I-NEXT: lw s3, 40(a5) -; RV32I-NEXT: lw s4, 44(a5) -; RV32I-NEXT: lw s5, 48(a5) -; RV32I-NEXT: lw s6, 52(a5) -; RV32I-NEXT: lw s7, 56(a5) -; RV32I-NEXT: lw s8, 60(a5) -; RV32I-NEXT: lw s9, 64(a5) -; RV32I-NEXT: lw s10, 68(a5) -; RV32I-NEXT: lw s11, 72(a5) -; RV32I-NEXT: lw ra, 76(a5) -; RV32I-NEXT: lw s1, 80(a5) -; RV32I-NEXT: lw t3, 84(a5) -; RV32I-NEXT: lw t2, 88(a5) -; RV32I-NEXT: lw t1, 92(a5) -; RV32I-NEXT: lw t0, 96(a5) -; RV32I-NEXT: lw s0, 100(a5) +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) ; RV32I-NEXT: lw a6, 104(a5) ; RV32I-NEXT: lw a4, 108(a5) ; RV32I-NEXT: lw a0, 124(a5) @@ -88,26 +88,26 @@ ; RV32I-NEXT: sw a3, 112(a5) ; RV32I-NEXT: sw a4, 108(a5) ; RV32I-NEXT: sw a6, 104(a5) -; RV32I-NEXT: sw s0, 100(a5) -; RV32I-NEXT: sw t0, 96(a5) -; RV32I-NEXT: sw t1, 92(a5) -; RV32I-NEXT: sw t2, 88(a5) -; RV32I-NEXT: sw t3, 84(a5) -; RV32I-NEXT: sw s1, 80(a5) -; RV32I-NEXT: sw ra, 76(a5) -; RV32I-NEXT: sw s11, 72(a5) -; RV32I-NEXT: sw s10, 68(a5) -; RV32I-NEXT: sw s9, 64(a5) -; RV32I-NEXT: sw s8, 60(a5) -; RV32I-NEXT: sw s7, 56(a5) -; RV32I-NEXT: sw s6, 52(a5) -; RV32I-NEXT: sw s5, 48(a5) -; RV32I-NEXT: sw s4, 44(a5) -; RV32I-NEXT: sw s3, 40(a5) -; RV32I-NEXT: sw s2, 36(a5) -; RV32I-NEXT: sw t6, 32(a5) -; RV32I-NEXT: sw t5, 28(a5) -; RV32I-NEXT: sw t4, 24(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 20(a5) ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload @@ -169,24 +169,24 @@ ; RV32I-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: lw a0, 24(a5) ; RV32I-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw t5, 28(a5) -; RV32I-WITH-FP-NEXT: lw t6, 32(a5) -; RV32I-WITH-FP-NEXT: lw s2, 36(a5) -; RV32I-WITH-FP-NEXT: lw s3, 40(a5) -; RV32I-WITH-FP-NEXT: lw s4, 44(a5) -; RV32I-WITH-FP-NEXT: lw s5, 48(a5) -; RV32I-WITH-FP-NEXT: lw s6, 52(a5) -; RV32I-WITH-FP-NEXT: lw s7, 56(a5) -; RV32I-WITH-FP-NEXT: lw s8, 60(a5) -; RV32I-WITH-FP-NEXT: lw s9, 64(a5) -; RV32I-WITH-FP-NEXT: lw s10, 68(a5) -; RV32I-WITH-FP-NEXT: lw s11, 72(a5) -; RV32I-WITH-FP-NEXT: lw ra, 76(a5) -; RV32I-WITH-FP-NEXT: lw t4, 80(a5) -; RV32I-WITH-FP-NEXT: lw t3, 84(a5) -; RV32I-WITH-FP-NEXT: lw t2, 88(a5) -; RV32I-WITH-FP-NEXT: lw s1, 92(a5) -; RV32I-WITH-FP-NEXT: lw t1, 96(a5) +; RV32I-WITH-FP-NEXT: lw t1, 28(a5) +; RV32I-WITH-FP-NEXT: lw t2, 32(a5) +; RV32I-WITH-FP-NEXT: lw t3, 36(a5) +; RV32I-WITH-FP-NEXT: lw t4, 40(a5) +; RV32I-WITH-FP-NEXT: lw t5, 44(a5) +; RV32I-WITH-FP-NEXT: lw t6, 48(a5) +; RV32I-WITH-FP-NEXT: lw s1, 52(a5) +; RV32I-WITH-FP-NEXT: lw s2, 56(a5) +; RV32I-WITH-FP-NEXT: lw s3, 60(a5) +; RV32I-WITH-FP-NEXT: lw s4, 64(a5) +; RV32I-WITH-FP-NEXT: lw s5, 68(a5) +; RV32I-WITH-FP-NEXT: lw s6, 72(a5) +; RV32I-WITH-FP-NEXT: lw s7, 76(a5) +; RV32I-WITH-FP-NEXT: lw s8, 80(a5) +; RV32I-WITH-FP-NEXT: lw s9, 84(a5) +; RV32I-WITH-FP-NEXT: lw s10, 88(a5) +; RV32I-WITH-FP-NEXT: lw s11, 92(a5) +; RV32I-WITH-FP-NEXT: lw ra, 96(a5) ; RV32I-WITH-FP-NEXT: lw t0, 100(a5) ; RV32I-WITH-FP-NEXT: lw a6, 104(a5) ; RV32I-WITH-FP-NEXT: lw a4, 108(a5) @@ -201,24 +201,24 @@ ; RV32I-WITH-FP-NEXT: sw a4, 108(a5) ; RV32I-WITH-FP-NEXT: sw a6, 104(a5) ; RV32I-WITH-FP-NEXT: sw t0, 100(a5) -; RV32I-WITH-FP-NEXT: sw t1, 96(a5) -; RV32I-WITH-FP-NEXT: sw s1, 92(a5) -; RV32I-WITH-FP-NEXT: sw t2, 88(a5) -; RV32I-WITH-FP-NEXT: sw t3, 84(a5) -; RV32I-WITH-FP-NEXT: sw t4, 80(a5) -; RV32I-WITH-FP-NEXT: sw ra, 76(a5) -; RV32I-WITH-FP-NEXT: sw s11, 72(a5) -; RV32I-WITH-FP-NEXT: sw s10, 68(a5) -; RV32I-WITH-FP-NEXT: sw s9, 64(a5) -; RV32I-WITH-FP-NEXT: sw s8, 60(a5) -; RV32I-WITH-FP-NEXT: sw s7, 56(a5) -; RV32I-WITH-FP-NEXT: sw s6, 52(a5) -; RV32I-WITH-FP-NEXT: sw s5, 48(a5) -; RV32I-WITH-FP-NEXT: sw s4, 44(a5) -; RV32I-WITH-FP-NEXT: sw s3, 40(a5) -; RV32I-WITH-FP-NEXT: sw s2, 36(a5) -; RV32I-WITH-FP-NEXT: sw t6, 32(a5) -; RV32I-WITH-FP-NEXT: sw t5, 28(a5) +; RV32I-WITH-FP-NEXT: sw ra, 96(a5) +; RV32I-WITH-FP-NEXT: sw s11, 92(a5) +; RV32I-WITH-FP-NEXT: sw s10, 88(a5) +; RV32I-WITH-FP-NEXT: sw s9, 84(a5) +; RV32I-WITH-FP-NEXT: sw s8, 80(a5) +; RV32I-WITH-FP-NEXT: sw s7, 76(a5) +; RV32I-WITH-FP-NEXT: sw s6, 72(a5) +; RV32I-WITH-FP-NEXT: sw s5, 68(a5) +; RV32I-WITH-FP-NEXT: sw s4, 64(a5) +; RV32I-WITH-FP-NEXT: sw s3, 60(a5) +; RV32I-WITH-FP-NEXT: sw s2, 56(a5) +; RV32I-WITH-FP-NEXT: sw s1, 52(a5) +; RV32I-WITH-FP-NEXT: sw t6, 48(a5) +; RV32I-WITH-FP-NEXT: sw t5, 44(a5) +; RV32I-WITH-FP-NEXT: sw t4, 40(a5) +; RV32I-WITH-FP-NEXT: sw t3, 36(a5) +; RV32I-WITH-FP-NEXT: sw t2, 32(a5) +; RV32I-WITH-FP-NEXT: sw t1, 28(a5) ; RV32I-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: sw a0, 24(a5) ; RV32I-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload @@ -279,26 +279,26 @@ ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) ; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw t4, 24(a5) -; RV64I-NEXT: lw t5, 28(a5) -; RV64I-NEXT: lw t6, 32(a5) -; RV64I-NEXT: lw s2, 36(a5) -; RV64I-NEXT: lw s3, 40(a5) -; RV64I-NEXT: lw s4, 44(a5) -; RV64I-NEXT: lw s5, 48(a5) -; RV64I-NEXT: lw s6, 52(a5) -; RV64I-NEXT: lw s7, 56(a5) -; RV64I-NEXT: lw s8, 60(a5) -; RV64I-NEXT: lw s9, 64(a5) -; RV64I-NEXT: lw s10, 68(a5) -; RV64I-NEXT: lw s11, 72(a5) -; RV64I-NEXT: lw ra, 76(a5) -; RV64I-NEXT: lw s1, 80(a5) -; RV64I-NEXT: lw t3, 84(a5) -; RV64I-NEXT: lw t2, 88(a5) -; RV64I-NEXT: lw t1, 92(a5) -; RV64I-NEXT: lw t0, 96(a5) -; RV64I-NEXT: lw s0, 100(a5) +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) ; RV64I-NEXT: lw a6, 104(a5) ; RV64I-NEXT: lw a4, 108(a5) ; RV64I-NEXT: lw a0, 124(a5) @@ -311,26 +311,26 @@ ; RV64I-NEXT: sw a3, 112(a5) ; RV64I-NEXT: sw a4, 108(a5) ; RV64I-NEXT: sw a6, 104(a5) -; RV64I-NEXT: sw s0, 100(a5) -; RV64I-NEXT: sw t0, 96(a5) -; RV64I-NEXT: sw t1, 92(a5) -; RV64I-NEXT: sw t2, 88(a5) -; RV64I-NEXT: sw t3, 84(a5) -; RV64I-NEXT: sw s1, 80(a5) -; RV64I-NEXT: sw ra, 76(a5) -; RV64I-NEXT: sw s11, 72(a5) -; RV64I-NEXT: sw s10, 68(a5) -; RV64I-NEXT: sw s9, 64(a5) -; RV64I-NEXT: sw s8, 60(a5) -; RV64I-NEXT: sw s7, 56(a5) -; RV64I-NEXT: sw s6, 52(a5) -; RV64I-NEXT: sw s5, 48(a5) -; RV64I-NEXT: sw s4, 44(a5) -; RV64I-NEXT: sw s3, 40(a5) -; RV64I-NEXT: sw s2, 36(a5) -; RV64I-NEXT: sw t6, 32(a5) -; RV64I-NEXT: sw t5, 28(a5) -; RV64I-NEXT: sw t4, 24(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 20(a5) ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload @@ -392,24 +392,24 @@ ; RV64I-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: lw a0, 24(a5) ; RV64I-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw t5, 28(a5) -; RV64I-WITH-FP-NEXT: lw t6, 32(a5) -; RV64I-WITH-FP-NEXT: lw s2, 36(a5) -; RV64I-WITH-FP-NEXT: lw s3, 40(a5) -; RV64I-WITH-FP-NEXT: lw s4, 44(a5) -; RV64I-WITH-FP-NEXT: lw s5, 48(a5) -; RV64I-WITH-FP-NEXT: lw s6, 52(a5) -; RV64I-WITH-FP-NEXT: lw s7, 56(a5) -; RV64I-WITH-FP-NEXT: lw s8, 60(a5) -; RV64I-WITH-FP-NEXT: lw s9, 64(a5) -; RV64I-WITH-FP-NEXT: lw s10, 68(a5) -; RV64I-WITH-FP-NEXT: lw s11, 72(a5) -; RV64I-WITH-FP-NEXT: lw ra, 76(a5) -; RV64I-WITH-FP-NEXT: lw t4, 80(a5) -; RV64I-WITH-FP-NEXT: lw t3, 84(a5) -; RV64I-WITH-FP-NEXT: lw t2, 88(a5) -; RV64I-WITH-FP-NEXT: lw s1, 92(a5) -; RV64I-WITH-FP-NEXT: lw t1, 96(a5) +; RV64I-WITH-FP-NEXT: lw t1, 28(a5) +; RV64I-WITH-FP-NEXT: lw t2, 32(a5) +; RV64I-WITH-FP-NEXT: lw t3, 36(a5) +; RV64I-WITH-FP-NEXT: lw t4, 40(a5) +; RV64I-WITH-FP-NEXT: lw t5, 44(a5) +; RV64I-WITH-FP-NEXT: lw t6, 48(a5) +; RV64I-WITH-FP-NEXT: lw s1, 52(a5) +; RV64I-WITH-FP-NEXT: lw s2, 56(a5) +; RV64I-WITH-FP-NEXT: lw s3, 60(a5) +; RV64I-WITH-FP-NEXT: lw s4, 64(a5) +; RV64I-WITH-FP-NEXT: lw s5, 68(a5) +; RV64I-WITH-FP-NEXT: lw s6, 72(a5) +; RV64I-WITH-FP-NEXT: lw s7, 76(a5) +; RV64I-WITH-FP-NEXT: lw s8, 80(a5) +; RV64I-WITH-FP-NEXT: lw s9, 84(a5) +; RV64I-WITH-FP-NEXT: lw s10, 88(a5) +; RV64I-WITH-FP-NEXT: lw s11, 92(a5) +; RV64I-WITH-FP-NEXT: lw ra, 96(a5) ; RV64I-WITH-FP-NEXT: lw t0, 100(a5) ; RV64I-WITH-FP-NEXT: lw a6, 104(a5) ; RV64I-WITH-FP-NEXT: lw a4, 108(a5) @@ -424,24 +424,24 @@ ; RV64I-WITH-FP-NEXT: sw a4, 108(a5) ; RV64I-WITH-FP-NEXT: sw a6, 104(a5) ; RV64I-WITH-FP-NEXT: sw t0, 100(a5) -; RV64I-WITH-FP-NEXT: sw t1, 96(a5) -; RV64I-WITH-FP-NEXT: sw s1, 92(a5) -; RV64I-WITH-FP-NEXT: sw t2, 88(a5) -; RV64I-WITH-FP-NEXT: sw t3, 84(a5) -; RV64I-WITH-FP-NEXT: sw t4, 80(a5) -; RV64I-WITH-FP-NEXT: sw ra, 76(a5) -; RV64I-WITH-FP-NEXT: sw s11, 72(a5) -; RV64I-WITH-FP-NEXT: sw s10, 68(a5) -; RV64I-WITH-FP-NEXT: sw s9, 64(a5) -; RV64I-WITH-FP-NEXT: sw s8, 60(a5) -; RV64I-WITH-FP-NEXT: sw s7, 56(a5) -; RV64I-WITH-FP-NEXT: sw s6, 52(a5) -; RV64I-WITH-FP-NEXT: sw s5, 48(a5) -; RV64I-WITH-FP-NEXT: sw s4, 44(a5) -; RV64I-WITH-FP-NEXT: sw s3, 40(a5) -; RV64I-WITH-FP-NEXT: sw s2, 36(a5) -; RV64I-WITH-FP-NEXT: sw t6, 32(a5) -; RV64I-WITH-FP-NEXT: sw t5, 28(a5) +; RV64I-WITH-FP-NEXT: sw ra, 96(a5) +; RV64I-WITH-FP-NEXT: sw s11, 92(a5) +; RV64I-WITH-FP-NEXT: sw s10, 88(a5) +; RV64I-WITH-FP-NEXT: sw s9, 84(a5) +; RV64I-WITH-FP-NEXT: sw s8, 80(a5) +; RV64I-WITH-FP-NEXT: sw s7, 76(a5) +; RV64I-WITH-FP-NEXT: sw s6, 72(a5) +; RV64I-WITH-FP-NEXT: sw s5, 68(a5) +; RV64I-WITH-FP-NEXT: sw s4, 64(a5) +; RV64I-WITH-FP-NEXT: sw s3, 60(a5) +; RV64I-WITH-FP-NEXT: sw s2, 56(a5) +; RV64I-WITH-FP-NEXT: sw s1, 52(a5) +; RV64I-WITH-FP-NEXT: sw t6, 48(a5) +; RV64I-WITH-FP-NEXT: sw t5, 44(a5) +; RV64I-WITH-FP-NEXT: sw t4, 40(a5) +; RV64I-WITH-FP-NEXT: sw t3, 36(a5) +; RV64I-WITH-FP-NEXT: sw t2, 32(a5) +; RV64I-WITH-FP-NEXT: sw t1, 28(a5) ; RV64I-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: sw a0, 24(a5) ; RV64I-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload @@ -505,100 +505,100 @@ ; RV32I-NEXT: sw a0, 80(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, %lo(var+12)(s0) ; RV32I-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi s1, s0, %lo(var) -; RV32I-NEXT: lw a0, 16(s1) +; RV32I-NEXT: addi s5, s0, %lo(var) +; RV32I-NEXT: lw a0, 16(s5) ; RV32I-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 20(s1) +; RV32I-NEXT: lw a0, 20(s5) ; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 24(s1) +; RV32I-NEXT: lw a0, 24(s5) ; RV32I-NEXT: sw a0, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 28(s1) +; RV32I-NEXT: lw a0, 28(s5) ; RV32I-NEXT: sw a0, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 32(s1) +; RV32I-NEXT: lw a0, 32(s5) ; RV32I-NEXT: sw a0, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 36(s1) +; RV32I-NEXT: lw a0, 36(s5) ; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 40(s1) +; RV32I-NEXT: lw a0, 40(s5) ; RV32I-NEXT: sw a0, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 44(s1) +; RV32I-NEXT: lw a0, 44(s5) ; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 48(s1) +; RV32I-NEXT: lw a0, 48(s5) ; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 52(s1) +; RV32I-NEXT: lw a0, 52(s5) ; RV32I-NEXT: sw a0, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 56(s1) +; RV32I-NEXT: lw a0, 56(s5) ; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 60(s1) +; RV32I-NEXT: lw a0, 60(s5) ; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 64(s1) +; RV32I-NEXT: lw a0, 64(s5) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 68(s1) +; RV32I-NEXT: lw a0, 68(s5) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 72(s1) +; RV32I-NEXT: lw a0, 72(s5) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 76(s1) +; RV32I-NEXT: lw a0, 76(s5) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 80(s1) +; RV32I-NEXT: lw a0, 80(s5) ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 84(s1) +; RV32I-NEXT: lw a0, 84(s5) ; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s4, 88(s1) -; RV32I-NEXT: lw s5, 92(s1) -; RV32I-NEXT: lw s6, 96(s1) -; RV32I-NEXT: lw s7, 100(s1) -; RV32I-NEXT: lw s8, 104(s1) -; RV32I-NEXT: lw s9, 108(s1) -; RV32I-NEXT: lw s10, 112(s1) -; RV32I-NEXT: lw s11, 116(s1) -; RV32I-NEXT: lw s2, 120(s1) -; RV32I-NEXT: lw s3, 124(s1) +; RV32I-NEXT: lw s3, 88(s5) +; RV32I-NEXT: lw s4, 92(s5) +; RV32I-NEXT: lw s6, 96(s5) +; RV32I-NEXT: lw s7, 100(s5) +; RV32I-NEXT: lw s8, 104(s5) +; RV32I-NEXT: lw s9, 108(s5) +; RV32I-NEXT: lw s10, 112(s5) +; RV32I-NEXT: lw s11, 116(s5) +; RV32I-NEXT: lw s1, 120(s5) +; RV32I-NEXT: lw s2, 124(s5) ; RV32I-NEXT: call callee@plt -; RV32I-NEXT: sw s3, 124(s1) -; RV32I-NEXT: sw s2, 120(s1) -; RV32I-NEXT: sw s11, 116(s1) -; RV32I-NEXT: sw s10, 112(s1) -; RV32I-NEXT: sw s9, 108(s1) -; RV32I-NEXT: sw s8, 104(s1) -; RV32I-NEXT: sw s7, 100(s1) -; RV32I-NEXT: sw s6, 96(s1) -; RV32I-NEXT: sw s5, 92(s1) -; RV32I-NEXT: sw s4, 88(s1) +; RV32I-NEXT: sw s2, 124(s5) +; RV32I-NEXT: sw s1, 120(s5) +; RV32I-NEXT: sw s11, 116(s5) +; RV32I-NEXT: sw s10, 112(s5) +; RV32I-NEXT: sw s9, 108(s5) +; RV32I-NEXT: sw s8, 104(s5) +; RV32I-NEXT: sw s7, 100(s5) +; RV32I-NEXT: sw s6, 96(s5) +; RV32I-NEXT: sw s4, 92(s5) +; RV32I-NEXT: sw s3, 88(s5) ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 84(s1) +; RV32I-NEXT: sw a0, 84(s5) ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 80(s1) +; RV32I-NEXT: sw a0, 80(s5) ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 76(s1) +; RV32I-NEXT: sw a0, 76(s5) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 72(s1) +; RV32I-NEXT: sw a0, 72(s5) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 68(s1) +; RV32I-NEXT: sw a0, 68(s5) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 64(s1) +; RV32I-NEXT: sw a0, 64(s5) ; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 60(s1) +; RV32I-NEXT: sw a0, 60(s5) ; RV32I-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 56(s1) +; RV32I-NEXT: sw a0, 56(s5) ; RV32I-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 52(s1) +; RV32I-NEXT: sw a0, 52(s5) ; RV32I-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 48(s1) +; RV32I-NEXT: sw a0, 48(s5) ; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 44(s1) +; RV32I-NEXT: sw a0, 44(s5) ; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 40(s1) +; RV32I-NEXT: sw a0, 40(s5) ; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 36(s1) +; RV32I-NEXT: sw a0, 36(s5) ; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 32(s1) +; RV32I-NEXT: sw a0, 32(s5) ; RV32I-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 28(s1) +; RV32I-NEXT: sw a0, 28(s5) ; RV32I-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 24(s1) +; RV32I-NEXT: sw a0, 24(s5) ; RV32I-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 20(s1) +; RV32I-NEXT: sw a0, 20(s5) ; RV32I-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 16(s1) +; RV32I-NEXT: sw a0, 16(s5) ; RV32I-NEXT: lw a0, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, %lo(var+12)(s0) ; RV32I-NEXT: lw a0, 80(sp) # 4-byte Folded Reload @@ -640,119 +640,119 @@ ; RV32I-WITH-FP-NEXT: sw s10, 96(sp) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: sw s11, 92(sp) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: addi s0, sp, 144 -; RV32I-WITH-FP-NEXT: lui s6, %hi(var) -; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV32I-WITH-FP-NEXT: lui s1, %hi(var) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: addi s1, s6, %lo(var) -; RV32I-WITH-FP-NEXT: lw a0, 16(s1) +; RV32I-WITH-FP-NEXT: addi s6, s1, %lo(var) +; RV32I-WITH-FP-NEXT: lw a0, 16(s6) ; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 20(s1) +; RV32I-WITH-FP-NEXT: lw a0, 20(s6) ; RV32I-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 24(s1) +; RV32I-WITH-FP-NEXT: lw a0, 24(s6) ; RV32I-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 28(s1) +; RV32I-WITH-FP-NEXT: lw a0, 28(s6) ; RV32I-WITH-FP-NEXT: sw a0, -84(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 32(s1) +; RV32I-WITH-FP-NEXT: lw a0, 32(s6) ; RV32I-WITH-FP-NEXT: sw a0, -88(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 36(s1) +; RV32I-WITH-FP-NEXT: lw a0, 36(s6) ; RV32I-WITH-FP-NEXT: sw a0, -92(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 40(s1) +; RV32I-WITH-FP-NEXT: lw a0, 40(s6) ; RV32I-WITH-FP-NEXT: sw a0, -96(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 44(s1) +; RV32I-WITH-FP-NEXT: lw a0, 44(s6) ; RV32I-WITH-FP-NEXT: sw a0, -100(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 48(s1) +; RV32I-WITH-FP-NEXT: lw a0, 48(s6) ; RV32I-WITH-FP-NEXT: sw a0, -104(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 52(s1) +; RV32I-WITH-FP-NEXT: lw a0, 52(s6) ; RV32I-WITH-FP-NEXT: sw a0, -108(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 56(s1) +; RV32I-WITH-FP-NEXT: lw a0, 56(s6) ; RV32I-WITH-FP-NEXT: sw a0, -112(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 60(s1) +; RV32I-WITH-FP-NEXT: lw a0, 60(s6) ; RV32I-WITH-FP-NEXT: sw a0, -116(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 64(s1) +; RV32I-WITH-FP-NEXT: lw a0, 64(s6) ; RV32I-WITH-FP-NEXT: sw a0, -120(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 68(s1) +; RV32I-WITH-FP-NEXT: lw a0, 68(s6) ; RV32I-WITH-FP-NEXT: sw a0, -124(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 72(s1) +; RV32I-WITH-FP-NEXT: lw a0, 72(s6) ; RV32I-WITH-FP-NEXT: sw a0, -128(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 76(s1) +; RV32I-WITH-FP-NEXT: lw a0, 76(s6) ; RV32I-WITH-FP-NEXT: sw a0, -132(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 80(s1) +; RV32I-WITH-FP-NEXT: lw a0, 80(s6) ; RV32I-WITH-FP-NEXT: sw a0, -136(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 84(s1) +; RV32I-WITH-FP-NEXT: lw a0, 84(s6) ; RV32I-WITH-FP-NEXT: sw a0, -140(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 88(s1) +; RV32I-WITH-FP-NEXT: lw a0, 88(s6) ; RV32I-WITH-FP-NEXT: sw a0, -144(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw s8, 92(s1) -; RV32I-WITH-FP-NEXT: lw s9, 96(s1) -; RV32I-WITH-FP-NEXT: lw s10, 100(s1) -; RV32I-WITH-FP-NEXT: lw s11, 104(s1) -; RV32I-WITH-FP-NEXT: lw s2, 108(s1) -; RV32I-WITH-FP-NEXT: lw s3, 112(s1) -; RV32I-WITH-FP-NEXT: lw s4, 116(s1) -; RV32I-WITH-FP-NEXT: lw s5, 120(s1) -; RV32I-WITH-FP-NEXT: lw s7, 124(s1) +; RV32I-WITH-FP-NEXT: lw s8, 92(s6) +; RV32I-WITH-FP-NEXT: lw s9, 96(s6) +; RV32I-WITH-FP-NEXT: lw s10, 100(s6) +; RV32I-WITH-FP-NEXT: lw s11, 104(s6) +; RV32I-WITH-FP-NEXT: lw s2, 108(s6) +; RV32I-WITH-FP-NEXT: lw s3, 112(s6) +; RV32I-WITH-FP-NEXT: lw s4, 116(s6) +; RV32I-WITH-FP-NEXT: lw s5, 120(s6) +; RV32I-WITH-FP-NEXT: lw s7, 124(s6) ; RV32I-WITH-FP-NEXT: call callee@plt -; RV32I-WITH-FP-NEXT: sw s7, 124(s1) -; RV32I-WITH-FP-NEXT: sw s5, 120(s1) -; RV32I-WITH-FP-NEXT: sw s4, 116(s1) -; RV32I-WITH-FP-NEXT: sw s3, 112(s1) -; RV32I-WITH-FP-NEXT: sw s2, 108(s1) -; RV32I-WITH-FP-NEXT: sw s11, 104(s1) -; RV32I-WITH-FP-NEXT: sw s10, 100(s1) -; RV32I-WITH-FP-NEXT: sw s9, 96(s1) -; RV32I-WITH-FP-NEXT: sw s8, 92(s1) +; RV32I-WITH-FP-NEXT: sw s7, 124(s6) +; RV32I-WITH-FP-NEXT: sw s5, 120(s6) +; RV32I-WITH-FP-NEXT: sw s4, 116(s6) +; RV32I-WITH-FP-NEXT: sw s3, 112(s6) +; RV32I-WITH-FP-NEXT: sw s2, 108(s6) +; RV32I-WITH-FP-NEXT: sw s11, 104(s6) +; RV32I-WITH-FP-NEXT: sw s10, 100(s6) +; RV32I-WITH-FP-NEXT: sw s9, 96(s6) +; RV32I-WITH-FP-NEXT: sw s8, 92(s6) ; RV32I-WITH-FP-NEXT: lw a0, -144(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 88(s1) +; RV32I-WITH-FP-NEXT: sw a0, 88(s6) ; RV32I-WITH-FP-NEXT: lw a0, -140(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 84(s1) +; RV32I-WITH-FP-NEXT: sw a0, 84(s6) ; RV32I-WITH-FP-NEXT: lw a0, -136(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 80(s1) +; RV32I-WITH-FP-NEXT: sw a0, 80(s6) ; RV32I-WITH-FP-NEXT: lw a0, -132(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 76(s1) +; RV32I-WITH-FP-NEXT: sw a0, 76(s6) ; RV32I-WITH-FP-NEXT: lw a0, -128(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 72(s1) +; RV32I-WITH-FP-NEXT: sw a0, 72(s6) ; RV32I-WITH-FP-NEXT: lw a0, -124(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 68(s1) +; RV32I-WITH-FP-NEXT: sw a0, 68(s6) ; RV32I-WITH-FP-NEXT: lw a0, -120(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 64(s1) +; RV32I-WITH-FP-NEXT: sw a0, 64(s6) ; RV32I-WITH-FP-NEXT: lw a0, -116(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 60(s1) +; RV32I-WITH-FP-NEXT: sw a0, 60(s6) ; RV32I-WITH-FP-NEXT: lw a0, -112(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 56(s1) +; RV32I-WITH-FP-NEXT: sw a0, 56(s6) ; RV32I-WITH-FP-NEXT: lw a0, -108(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 52(s1) +; RV32I-WITH-FP-NEXT: sw a0, 52(s6) ; RV32I-WITH-FP-NEXT: lw a0, -104(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 48(s1) +; RV32I-WITH-FP-NEXT: sw a0, 48(s6) ; RV32I-WITH-FP-NEXT: lw a0, -100(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 44(s1) +; RV32I-WITH-FP-NEXT: sw a0, 44(s6) ; RV32I-WITH-FP-NEXT: lw a0, -96(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 40(s1) +; RV32I-WITH-FP-NEXT: sw a0, 40(s6) ; RV32I-WITH-FP-NEXT: lw a0, -92(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 36(s1) +; RV32I-WITH-FP-NEXT: sw a0, 36(s6) ; RV32I-WITH-FP-NEXT: lw a0, -88(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 32(s1) +; RV32I-WITH-FP-NEXT: sw a0, 32(s6) ; RV32I-WITH-FP-NEXT: lw a0, -84(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 28(s1) +; RV32I-WITH-FP-NEXT: sw a0, 28(s6) ; RV32I-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 24(s1) +; RV32I-WITH-FP-NEXT: sw a0, 24(s6) ; RV32I-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 20(s1) +; RV32I-WITH-FP-NEXT: sw a0, 20(s6) ; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 16(s1) +; RV32I-WITH-FP-NEXT: sw a0, 16(s6) ; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(s1) ; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(s1) ; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(s1) ; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(s1) ; RV32I-WITH-FP-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -794,100 +794,100 @@ ; RV64I-NEXT: sd a0, 160(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, %lo(var+12)(s0) ; RV64I-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi s1, s0, %lo(var) -; RV64I-NEXT: lw a0, 16(s1) +; RV64I-NEXT: addi s5, s0, %lo(var) +; RV64I-NEXT: lw a0, 16(s5) ; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 20(s1) +; RV64I-NEXT: lw a0, 20(s5) ; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 24(s1) +; RV64I-NEXT: lw a0, 24(s5) ; RV64I-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 28(s1) +; RV64I-NEXT: lw a0, 28(s5) ; RV64I-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 32(s1) +; RV64I-NEXT: lw a0, 32(s5) ; RV64I-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 36(s1) +; RV64I-NEXT: lw a0, 36(s5) ; RV64I-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 40(s1) +; RV64I-NEXT: lw a0, 40(s5) ; RV64I-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 44(s1) +; RV64I-NEXT: lw a0, 44(s5) ; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 48(s1) +; RV64I-NEXT: lw a0, 48(s5) ; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 52(s1) +; RV64I-NEXT: lw a0, 52(s5) ; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 56(s1) +; RV64I-NEXT: lw a0, 56(s5) ; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 60(s1) +; RV64I-NEXT: lw a0, 60(s5) ; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 64(s1) +; RV64I-NEXT: lw a0, 64(s5) ; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 68(s1) +; RV64I-NEXT: lw a0, 68(s5) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 72(s1) +; RV64I-NEXT: lw a0, 72(s5) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 76(s1) +; RV64I-NEXT: lw a0, 76(s5) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 80(s1) +; RV64I-NEXT: lw a0, 80(s5) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 84(s1) +; RV64I-NEXT: lw a0, 84(s5) ; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw s4, 88(s1) -; RV64I-NEXT: lw s5, 92(s1) -; RV64I-NEXT: lw s6, 96(s1) -; RV64I-NEXT: lw s7, 100(s1) -; RV64I-NEXT: lw s8, 104(s1) -; RV64I-NEXT: lw s9, 108(s1) -; RV64I-NEXT: lw s10, 112(s1) -; RV64I-NEXT: lw s11, 116(s1) -; RV64I-NEXT: lw s2, 120(s1) -; RV64I-NEXT: lw s3, 124(s1) +; RV64I-NEXT: lw s3, 88(s5) +; RV64I-NEXT: lw s4, 92(s5) +; RV64I-NEXT: lw s6, 96(s5) +; RV64I-NEXT: lw s7, 100(s5) +; RV64I-NEXT: lw s8, 104(s5) +; RV64I-NEXT: lw s9, 108(s5) +; RV64I-NEXT: lw s10, 112(s5) +; RV64I-NEXT: lw s11, 116(s5) +; RV64I-NEXT: lw s1, 120(s5) +; RV64I-NEXT: lw s2, 124(s5) ; RV64I-NEXT: call callee@plt -; RV64I-NEXT: sw s3, 124(s1) -; RV64I-NEXT: sw s2, 120(s1) -; RV64I-NEXT: sw s11, 116(s1) -; RV64I-NEXT: sw s10, 112(s1) -; RV64I-NEXT: sw s9, 108(s1) -; RV64I-NEXT: sw s8, 104(s1) -; RV64I-NEXT: sw s7, 100(s1) -; RV64I-NEXT: sw s6, 96(s1) -; RV64I-NEXT: sw s5, 92(s1) -; RV64I-NEXT: sw s4, 88(s1) +; RV64I-NEXT: sw s2, 124(s5) +; RV64I-NEXT: sw s1, 120(s5) +; RV64I-NEXT: sw s11, 116(s5) +; RV64I-NEXT: sw s10, 112(s5) +; RV64I-NEXT: sw s9, 108(s5) +; RV64I-NEXT: sw s8, 104(s5) +; RV64I-NEXT: sw s7, 100(s5) +; RV64I-NEXT: sw s6, 96(s5) +; RV64I-NEXT: sw s4, 92(s5) +; RV64I-NEXT: sw s3, 88(s5) ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 84(s1) +; RV64I-NEXT: sw a0, 84(s5) ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 80(s1) +; RV64I-NEXT: sw a0, 80(s5) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 76(s1) +; RV64I-NEXT: sw a0, 76(s5) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 72(s1) +; RV64I-NEXT: sw a0, 72(s5) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 68(s1) +; RV64I-NEXT: sw a0, 68(s5) ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 64(s1) +; RV64I-NEXT: sw a0, 64(s5) ; RV64I-NEXT: ld a0, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 60(s1) +; RV64I-NEXT: sw a0, 60(s5) ; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 56(s1) +; RV64I-NEXT: sw a0, 56(s5) ; RV64I-NEXT: ld a0, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 52(s1) +; RV64I-NEXT: sw a0, 52(s5) ; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 48(s1) +; RV64I-NEXT: sw a0, 48(s5) ; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 44(s1) +; RV64I-NEXT: sw a0, 44(s5) ; RV64I-NEXT: ld a0, 96(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 40(s1) +; RV64I-NEXT: sw a0, 40(s5) ; RV64I-NEXT: ld a0, 104(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 36(s1) +; RV64I-NEXT: sw a0, 36(s5) ; RV64I-NEXT: ld a0, 112(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 32(s1) +; RV64I-NEXT: sw a0, 32(s5) ; RV64I-NEXT: ld a0, 120(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 28(s1) +; RV64I-NEXT: sw a0, 28(s5) ; RV64I-NEXT: ld a0, 128(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 24(s1) +; RV64I-NEXT: sw a0, 24(s5) ; RV64I-NEXT: ld a0, 136(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 20(s1) +; RV64I-NEXT: sw a0, 20(s5) ; RV64I-NEXT: ld a0, 144(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 16(s1) +; RV64I-NEXT: sw a0, 16(s5) ; RV64I-NEXT: ld a0, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, %lo(var+12)(s0) ; RV64I-NEXT: ld a0, 160(sp) # 8-byte Folded Reload @@ -929,119 +929,119 @@ ; RV64I-WITH-FP-NEXT: sd s10, 192(sp) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: sd s11, 184(sp) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: addi s0, sp, 288 -; RV64I-WITH-FP-NEXT: lui s6, %hi(var) -; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV64I-WITH-FP-NEXT: lui s1, %hi(var) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: addi s1, s6, %lo(var) -; RV64I-WITH-FP-NEXT: lw a0, 16(s1) +; RV64I-WITH-FP-NEXT: addi s6, s1, %lo(var) +; RV64I-WITH-FP-NEXT: lw a0, 16(s6) ; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 20(s1) +; RV64I-WITH-FP-NEXT: lw a0, 20(s6) ; RV64I-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 24(s1) +; RV64I-WITH-FP-NEXT: lw a0, 24(s6) ; RV64I-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 28(s1) +; RV64I-WITH-FP-NEXT: lw a0, 28(s6) ; RV64I-WITH-FP-NEXT: sd a0, -168(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 32(s1) +; RV64I-WITH-FP-NEXT: lw a0, 32(s6) ; RV64I-WITH-FP-NEXT: sd a0, -176(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 36(s1) +; RV64I-WITH-FP-NEXT: lw a0, 36(s6) ; RV64I-WITH-FP-NEXT: sd a0, -184(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 40(s1) +; RV64I-WITH-FP-NEXT: lw a0, 40(s6) ; RV64I-WITH-FP-NEXT: sd a0, -192(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 44(s1) +; RV64I-WITH-FP-NEXT: lw a0, 44(s6) ; RV64I-WITH-FP-NEXT: sd a0, -200(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 48(s1) +; RV64I-WITH-FP-NEXT: lw a0, 48(s6) ; RV64I-WITH-FP-NEXT: sd a0, -208(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 52(s1) +; RV64I-WITH-FP-NEXT: lw a0, 52(s6) ; RV64I-WITH-FP-NEXT: sd a0, -216(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 56(s1) +; RV64I-WITH-FP-NEXT: lw a0, 56(s6) ; RV64I-WITH-FP-NEXT: sd a0, -224(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 60(s1) +; RV64I-WITH-FP-NEXT: lw a0, 60(s6) ; RV64I-WITH-FP-NEXT: sd a0, -232(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 64(s1) +; RV64I-WITH-FP-NEXT: lw a0, 64(s6) ; RV64I-WITH-FP-NEXT: sd a0, -240(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 68(s1) +; RV64I-WITH-FP-NEXT: lw a0, 68(s6) ; RV64I-WITH-FP-NEXT: sd a0, -248(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 72(s1) +; RV64I-WITH-FP-NEXT: lw a0, 72(s6) ; RV64I-WITH-FP-NEXT: sd a0, -256(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 76(s1) +; RV64I-WITH-FP-NEXT: lw a0, 76(s6) ; RV64I-WITH-FP-NEXT: sd a0, -264(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 80(s1) +; RV64I-WITH-FP-NEXT: lw a0, 80(s6) ; RV64I-WITH-FP-NEXT: sd a0, -272(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 84(s1) +; RV64I-WITH-FP-NEXT: lw a0, 84(s6) ; RV64I-WITH-FP-NEXT: sd a0, -280(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 88(s1) +; RV64I-WITH-FP-NEXT: lw a0, 88(s6) ; RV64I-WITH-FP-NEXT: sd a0, -288(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw s8, 92(s1) -; RV64I-WITH-FP-NEXT: lw s9, 96(s1) -; RV64I-WITH-FP-NEXT: lw s10, 100(s1) -; RV64I-WITH-FP-NEXT: lw s11, 104(s1) -; RV64I-WITH-FP-NEXT: lw s2, 108(s1) -; RV64I-WITH-FP-NEXT: lw s3, 112(s1) -; RV64I-WITH-FP-NEXT: lw s4, 116(s1) -; RV64I-WITH-FP-NEXT: lw s5, 120(s1) -; RV64I-WITH-FP-NEXT: lw s7, 124(s1) +; RV64I-WITH-FP-NEXT: lw s8, 92(s6) +; RV64I-WITH-FP-NEXT: lw s9, 96(s6) +; RV64I-WITH-FP-NEXT: lw s10, 100(s6) +; RV64I-WITH-FP-NEXT: lw s11, 104(s6) +; RV64I-WITH-FP-NEXT: lw s2, 108(s6) +; RV64I-WITH-FP-NEXT: lw s3, 112(s6) +; RV64I-WITH-FP-NEXT: lw s4, 116(s6) +; RV64I-WITH-FP-NEXT: lw s5, 120(s6) +; RV64I-WITH-FP-NEXT: lw s7, 124(s6) ; RV64I-WITH-FP-NEXT: call callee@plt -; RV64I-WITH-FP-NEXT: sw s7, 124(s1) -; RV64I-WITH-FP-NEXT: sw s5, 120(s1) -; RV64I-WITH-FP-NEXT: sw s4, 116(s1) -; RV64I-WITH-FP-NEXT: sw s3, 112(s1) -; RV64I-WITH-FP-NEXT: sw s2, 108(s1) -; RV64I-WITH-FP-NEXT: sw s11, 104(s1) -; RV64I-WITH-FP-NEXT: sw s10, 100(s1) -; RV64I-WITH-FP-NEXT: sw s9, 96(s1) -; RV64I-WITH-FP-NEXT: sw s8, 92(s1) +; RV64I-WITH-FP-NEXT: sw s7, 124(s6) +; RV64I-WITH-FP-NEXT: sw s5, 120(s6) +; RV64I-WITH-FP-NEXT: sw s4, 116(s6) +; RV64I-WITH-FP-NEXT: sw s3, 112(s6) +; RV64I-WITH-FP-NEXT: sw s2, 108(s6) +; RV64I-WITH-FP-NEXT: sw s11, 104(s6) +; RV64I-WITH-FP-NEXT: sw s10, 100(s6) +; RV64I-WITH-FP-NEXT: sw s9, 96(s6) +; RV64I-WITH-FP-NEXT: sw s8, 92(s6) ; RV64I-WITH-FP-NEXT: ld a0, -288(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 88(s1) +; RV64I-WITH-FP-NEXT: sw a0, 88(s6) ; RV64I-WITH-FP-NEXT: ld a0, -280(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 84(s1) +; RV64I-WITH-FP-NEXT: sw a0, 84(s6) ; RV64I-WITH-FP-NEXT: ld a0, -272(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 80(s1) +; RV64I-WITH-FP-NEXT: sw a0, 80(s6) ; RV64I-WITH-FP-NEXT: ld a0, -264(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 76(s1) +; RV64I-WITH-FP-NEXT: sw a0, 76(s6) ; RV64I-WITH-FP-NEXT: ld a0, -256(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 72(s1) +; RV64I-WITH-FP-NEXT: sw a0, 72(s6) ; RV64I-WITH-FP-NEXT: ld a0, -248(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 68(s1) +; RV64I-WITH-FP-NEXT: sw a0, 68(s6) ; RV64I-WITH-FP-NEXT: ld a0, -240(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 64(s1) +; RV64I-WITH-FP-NEXT: sw a0, 64(s6) ; RV64I-WITH-FP-NEXT: ld a0, -232(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 60(s1) +; RV64I-WITH-FP-NEXT: sw a0, 60(s6) ; RV64I-WITH-FP-NEXT: ld a0, -224(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 56(s1) +; RV64I-WITH-FP-NEXT: sw a0, 56(s6) ; RV64I-WITH-FP-NEXT: ld a0, -216(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 52(s1) +; RV64I-WITH-FP-NEXT: sw a0, 52(s6) ; RV64I-WITH-FP-NEXT: ld a0, -208(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 48(s1) +; RV64I-WITH-FP-NEXT: sw a0, 48(s6) ; RV64I-WITH-FP-NEXT: ld a0, -200(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 44(s1) +; RV64I-WITH-FP-NEXT: sw a0, 44(s6) ; RV64I-WITH-FP-NEXT: ld a0, -192(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 40(s1) +; RV64I-WITH-FP-NEXT: sw a0, 40(s6) ; RV64I-WITH-FP-NEXT: ld a0, -184(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 36(s1) +; RV64I-WITH-FP-NEXT: sw a0, 36(s6) ; RV64I-WITH-FP-NEXT: ld a0, -176(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 32(s1) +; RV64I-WITH-FP-NEXT: sw a0, 32(s6) ; RV64I-WITH-FP-NEXT: ld a0, -168(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 28(s1) +; RV64I-WITH-FP-NEXT: sw a0, 28(s6) ; RV64I-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 24(s1) +; RV64I-WITH-FP-NEXT: sw a0, 24(s6) ; RV64I-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 20(s1) +; RV64I-WITH-FP-NEXT: sw a0, 20(s6) ; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 16(s1) +; RV64I-WITH-FP-NEXT: sw a0, 16(s6) ; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(s1) ; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(s1) ; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(s1) ; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(s1) ; RV64I-WITH-FP-NEXT: ld ra, 280(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s0, 272(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s1, 264(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -190,21 +190,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-LABEL: callee_large_scalars: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw a6, 0(a1) -; RV32I-FPELIM-NEXT: lw a7, 0(a0) +; RV32I-FPELIM-NEXT: lw a2, 0(a1) +; RV32I-FPELIM-NEXT: lw a3, 0(a0) ; RV32I-FPELIM-NEXT: lw a4, 4(a1) ; RV32I-FPELIM-NEXT: lw a5, 12(a1) -; RV32I-FPELIM-NEXT: lw a2, 12(a0) -; RV32I-FPELIM-NEXT: lw a3, 4(a0) +; RV32I-FPELIM-NEXT: lw a6, 12(a0) +; RV32I-FPELIM-NEXT: lw a7, 4(a0) ; RV32I-FPELIM-NEXT: lw a1, 8(a1) ; RV32I-FPELIM-NEXT: lw a0, 8(a0) -; RV32I-FPELIM-NEXT: xor a2, a2, a5 -; RV32I-FPELIM-NEXT: xor a3, a3, a4 -; RV32I-FPELIM-NEXT: or a2, a3, a2 +; RV32I-FPELIM-NEXT: xor a5, a6, a5 +; RV32I-FPELIM-NEXT: xor a4, a7, a4 +; RV32I-FPELIM-NEXT: or a4, a4, a5 ; RV32I-FPELIM-NEXT: xor a0, a0, a1 -; RV32I-FPELIM-NEXT: xor a1, a7, a6 +; RV32I-FPELIM-NEXT: xor a1, a3, a2 ; RV32I-FPELIM-NEXT: or a0, a1, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a2 +; RV32I-FPELIM-NEXT: or a0, a0, a4 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -214,21 +214,21 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw a6, 0(a1) -; RV32I-WITHFP-NEXT: lw a7, 0(a0) +; RV32I-WITHFP-NEXT: lw a2, 0(a1) +; RV32I-WITHFP-NEXT: lw a3, 0(a0) ; RV32I-WITHFP-NEXT: lw a4, 4(a1) ; RV32I-WITHFP-NEXT: lw a5, 12(a1) -; RV32I-WITHFP-NEXT: lw a2, 12(a0) -; RV32I-WITHFP-NEXT: lw a3, 4(a0) +; RV32I-WITHFP-NEXT: lw a6, 12(a0) +; RV32I-WITHFP-NEXT: lw a7, 4(a0) ; RV32I-WITHFP-NEXT: lw a1, 8(a1) ; RV32I-WITHFP-NEXT: lw a0, 8(a0) -; RV32I-WITHFP-NEXT: xor a2, a2, a5 -; RV32I-WITHFP-NEXT: xor a3, a3, a4 -; RV32I-WITHFP-NEXT: or a2, a3, a2 +; RV32I-WITHFP-NEXT: xor a5, a6, a5 +; RV32I-WITHFP-NEXT: xor a4, a7, a4 +; RV32I-WITHFP-NEXT: or a4, a4, a5 ; RV32I-WITHFP-NEXT: xor a0, a0, a1 -; RV32I-WITHFP-NEXT: xor a1, a7, a6 +; RV32I-WITHFP-NEXT: xor a1, a3, a2 ; RV32I-WITHFP-NEXT: or a0, a1, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a2 +; RV32I-WITHFP-NEXT: or a0, a0, a4 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -297,21 +297,21 @@ ; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 4(sp) -; RV32I-FPELIM-NEXT: lw a6, 0(a0) -; RV32I-FPELIM-NEXT: lw t0, 0(a7) +; RV32I-FPELIM-NEXT: lw a1, 0(a0) +; RV32I-FPELIM-NEXT: lw a2, 0(a7) ; RV32I-FPELIM-NEXT: lw a3, 4(a0) ; RV32I-FPELIM-NEXT: lw a4, 12(a0) ; RV32I-FPELIM-NEXT: lw a5, 12(a7) -; RV32I-FPELIM-NEXT: lw a1, 4(a7) +; RV32I-FPELIM-NEXT: lw a6, 4(a7) ; RV32I-FPELIM-NEXT: lw a0, 8(a0) -; RV32I-FPELIM-NEXT: lw a2, 8(a7) +; RV32I-FPELIM-NEXT: lw a7, 8(a7) ; RV32I-FPELIM-NEXT: xor a4, a5, a4 -; RV32I-FPELIM-NEXT: xor a1, a1, a3 -; RV32I-FPELIM-NEXT: or a1, a1, a4 -; RV32I-FPELIM-NEXT: xor a0, a2, a0 -; RV32I-FPELIM-NEXT: xor a2, t0, a6 -; RV32I-FPELIM-NEXT: or a0, a2, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a1 +; RV32I-FPELIM-NEXT: xor a3, a6, a3 +; RV32I-FPELIM-NEXT: or a3, a3, a4 +; RV32I-FPELIM-NEXT: xor a0, a7, a0 +; RV32I-FPELIM-NEXT: xor a1, a2, a1 +; RV32I-FPELIM-NEXT: or a0, a1, a0 +; RV32I-FPELIM-NEXT: or a0, a0, a3 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -322,21 +322,21 @@ ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 4(s0) -; RV32I-WITHFP-NEXT: lw a6, 0(a0) -; RV32I-WITHFP-NEXT: lw t0, 0(a7) +; RV32I-WITHFP-NEXT: lw a1, 0(a0) +; RV32I-WITHFP-NEXT: lw a2, 0(a7) ; RV32I-WITHFP-NEXT: lw a3, 4(a0) ; RV32I-WITHFP-NEXT: lw a4, 12(a0) ; RV32I-WITHFP-NEXT: lw a5, 12(a7) -; RV32I-WITHFP-NEXT: lw a1, 4(a7) +; RV32I-WITHFP-NEXT: lw a6, 4(a7) ; RV32I-WITHFP-NEXT: lw a0, 8(a0) -; RV32I-WITHFP-NEXT: lw a2, 8(a7) +; RV32I-WITHFP-NEXT: lw a7, 8(a7) ; RV32I-WITHFP-NEXT: xor a4, a5, a4 -; RV32I-WITHFP-NEXT: xor a1, a1, a3 -; RV32I-WITHFP-NEXT: or a1, a1, a4 -; RV32I-WITHFP-NEXT: xor a0, a2, a0 -; RV32I-WITHFP-NEXT: xor a2, t0, a6 -; RV32I-WITHFP-NEXT: or a0, a2, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a1 +; RV32I-WITHFP-NEXT: xor a3, a6, a3 +; RV32I-WITHFP-NEXT: or a3, a3, a4 +; RV32I-WITHFP-NEXT: xor a0, a7, a0 +; RV32I-WITHFP-NEXT: xor a1, a2, a1 +; RV32I-WITHFP-NEXT: or a0, a1, a0 +; RV32I-WITHFP-NEXT: or a0, a0, a3 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -106,21 +106,21 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-LABEL: callee_large_scalars: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a6, 0(a1) -; RV64I-NEXT: ld a7, 0(a0) +; RV64I-NEXT: ld a2, 0(a1) +; RV64I-NEXT: ld a3, 0(a0) ; RV64I-NEXT: ld a4, 8(a1) ; RV64I-NEXT: ld a5, 24(a1) -; RV64I-NEXT: ld a2, 24(a0) -; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: ld a7, 8(a0) ; RV64I-NEXT: ld a1, 16(a1) ; RV64I-NEXT: ld a0, 16(a0) -; RV64I-NEXT: xor a2, a2, a5 -; RV64I-NEXT: xor a3, a3, a4 -; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: xor a5, a6, a5 +; RV64I-NEXT: xor a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: xor a1, a7, a6 +; RV64I-NEXT: xor a1, a3, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %a, %b @@ -161,21 +161,21 @@ ; RV64I-LABEL: callee_large_scalars_exhausted_regs: ; RV64I: # %bb.0: ; RV64I-NEXT: ld a0, 8(sp) -; RV64I-NEXT: ld a6, 0(a0) -; RV64I-NEXT: ld t0, 0(a7) +; RV64I-NEXT: ld a1, 0(a0) +; RV64I-NEXT: ld a2, 0(a7) ; RV64I-NEXT: ld a3, 8(a0) ; RV64I-NEXT: ld a4, 24(a0) ; RV64I-NEXT: ld a5, 24(a7) -; RV64I-NEXT: ld a1, 8(a7) +; RV64I-NEXT: ld a6, 8(a7) ; RV64I-NEXT: ld a0, 16(a0) -; RV64I-NEXT: ld a2, 16(a7) +; RV64I-NEXT: ld a7, 16(a7) ; RV64I-NEXT: xor a4, a5, a4 -; RV64I-NEXT: xor a1, a1, a3 -; RV64I-NEXT: or a1, a1, a4 -; RV64I-NEXT: xor a0, a2, a0 -; RV64I-NEXT: xor a2, t0, a6 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: xor a3, a6, a3 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: xor a0, a7, a0 +; RV64I-NEXT: xor a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %h, %j diff --git a/llvm/test/CodeGen/RISCV/double-arith-strict.ll b/llvm/test/CodeGen/RISCV/double-arith-strict.ll --- a/llvm/test/CodeGen/RISCV/double-arith-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-arith-strict.ll @@ -313,10 +313,10 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: li a2, 0 @@ -325,10 +325,10 @@ ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -393,25 +393,25 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: mv s5, a4 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a3 ; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a2, s1, a0 +; RV32I-NEXT: xor a2, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: mv a3, s2 @@ -434,19 +434,19 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -489,7 +489,7 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 +; RV32I-NEXT: mv s0, a5 ; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 @@ -498,20 +498,20 @@ ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a3, s0, a0 +; RV32I-NEXT: xor a3, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a2, s4 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -531,19 +531,19 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -582,19 +582,19 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -654,10 +654,10 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: li a2, 0 @@ -666,10 +666,10 @@ ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a3, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -479,10 +479,10 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: li a2, 0 @@ -491,10 +491,10 @@ ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -559,25 +559,25 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: mv s5, a4 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a3 ; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a2, s1, a0 +; RV32I-NEXT: xor a2, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: mv a3, s2 @@ -600,19 +600,19 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -655,7 +655,7 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 +; RV32I-NEXT: mv s0, a5 ; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 @@ -664,20 +664,20 @@ ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a3, s0, a0 +; RV32I-NEXT: xor a3, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a2, s4 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -697,19 +697,19 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -748,19 +748,19 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -820,10 +820,10 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: li a2, 0 @@ -832,10 +832,10 @@ ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a3, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -945,10 +945,10 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: li a2, 0 @@ -956,10 +956,10 @@ ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __muldf3@plt ; RV32I-NEXT: mv a2, s4 ; RV32I-NEXT: mv a3, s5 @@ -981,16 +981,16 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __muldf3@plt +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1033,22 +1033,15 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a2, 0 @@ -1056,15 +1049,22 @@ ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __adddf3@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s5 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __muldf3@plt ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __subdf3@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1078,26 +1078,25 @@ ; ; RV64I-LABEL: fnmadd_d_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: li a1, -1 @@ -1105,12 +1104,11 @@ ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __subdf3@plt -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd double 0.0, %a ; avoid negation using xor %b_ = fadd double 0.0, %b ; avoid negation using xor @@ -1148,29 +1146,29 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s4, a3 -; RV32I-NEXT: mv s5, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s5 ; RV32I-NEXT: call __muldf3@plt ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __subdf3@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1189,19 +1187,19 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -137,15 +137,15 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 794112 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: lui s5, 524288 ; RV32I-NEXT: lui s4, 524288 @@ -156,17 +156,17 @@ ; RV32I-NEXT: lui a0, 269824 ; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: lui a2, 1047552 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: bge s0, a0, .LBB3_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: addi s4, s5, -1 ; RV32I-NEXT: .LBB3_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: bne a0, s0, .LBB3_6 ; RV32I-NEXT: # %bb.5: # %start @@ -363,20 +363,20 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 270080 ; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: lui a2, 1048064 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt ; RV32I-NEXT: li a1, 0 @@ -385,7 +385,7 @@ ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB6_2: # %start ; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: bgtz s2, .LBB6_4 +; RV32I-NEXT: bgtz s0, .LBB6_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB6_4: # %start @@ -694,7 +694,7 @@ ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 278016 ; RV32I-NEXT: addi s3, a0, -1 @@ -704,67 +704,67 @@ ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: lui a3, 802304 -; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfdi@plt ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: blt s6, s0, .LBB12_2 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: blt s6, s2, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB12_2: # %start ; RV32I-NEXT: li s6, -1 -; RV32I-NEXT: blt s0, s4, .LBB12_4 +; RV32I-NEXT: blt s2, s4, .LBB12_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv s6, a1 ; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: bne a0, s0, .LBB12_6 +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: bne a0, s2, .LBB12_6 ; RV32I-NEXT: # %bb.5: # %start ; RV32I-NEXT: mv s4, s6 ; RV32I-NEXT: .LBB12_6: # %start ; RV32I-NEXT: lui a3, 802304 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: lui s7, 524288 ; RV32I-NEXT: lui s6, 524288 -; RV32I-NEXT: blt a0, s0, .LBB12_8 +; RV32I-NEXT: blt a0, s2, .LBB12_8 ; RV32I-NEXT: # %bb.7: # %start ; RV32I-NEXT: mv s6, s5 ; RV32I-NEXT: .LBB12_8: # %start ; RV32I-NEXT: li a2, -1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: bge s0, a0, .LBB12_10 +; RV32I-NEXT: bge s2, a0, .LBB12_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: addi s6, s7, -1 ; RV32I-NEXT: .LBB12_10: # %start ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bne a0, s0, .LBB12_12 +; RV32I-NEXT: bne a0, s2, .LBB12_12 ; RV32I-NEXT: # %bb.11: # %start -; RV32I-NEXT: mv s0, s6 +; RV32I-NEXT: mv s2, s6 ; RV32I-NEXT: .LBB12_12: # %start ; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -936,22 +936,22 @@ ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 278272 ; RV32I-NEXT: addi s3, a0, -1 ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: li s2, -1 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: li s0, -1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfdi@plt ; RV32I-NEXT: mv s5, a1 @@ -966,12 +966,12 @@ ; RV32I-NEXT: mv s4, a1 ; RV32I-NEXT: .LBB14_4: # %start ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 @@ -983,10 +983,10 @@ ; RV32I-NEXT: .LBB14_6: # %start ; RV32I-NEXT: bgtz s3, .LBB14_8 ; RV32I-NEXT: # %bb.7: # %start -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: .LBB14_8: # %start ; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1515,15 +1515,15 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 790016 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: lui s4, 1048568 ; RV32I-NEXT: blt s3, s0, .LBB26_2 @@ -1532,8 +1532,8 @@ ; RV32I-NEXT: .LBB26_2: # %start ; RV32I-NEXT: lui a0, 265728 ; RV32I-NEXT: addi a3, a0, -64 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: bge s0, a0, .LBB26_4 @@ -1541,10 +1541,10 @@ ; RV32I-NEXT: lui a0, 8 ; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: .LBB26_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: bne a0, s0, .LBB26_6 ; RV32I-NEXT: # %bb.5: # %start @@ -1574,12 +1574,12 @@ ; RV64I-NEXT: slli a1, a0, 53 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB26_2 +; RV64I-NEXT: bltz s1, .LBB26_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB26_2: # %start @@ -1588,7 +1588,7 @@ ; RV64I-NEXT: slli a1, a0, 38 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: bge s1, a0, .LBB26_4 +; RV64I-NEXT: bge s2, a0, .LBB26_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 ; RV64I-NEXT: addiw s3, a0, -1 @@ -1596,11 +1596,11 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s1, .LBB26_6 +; RV64I-NEXT: bne a0, s2, .LBB26_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s2, s3 ; RV64I-NEXT: .LBB26_6: # %start -; RV64I-NEXT: slli a0, s1, 48 +; RV64I-NEXT: slli a0, s2, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1677,20 +1677,20 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 265984 ; RV32I-NEXT: addi a3, a0, -32 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt ; RV32I-NEXT: li a1, 0 @@ -1701,7 +1701,7 @@ ; RV32I-NEXT: lui a0, 16 ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgtz s2, .LBB28_4 +; RV32I-NEXT: bgtz s0, .LBB28_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: .LBB28_4: # %start @@ -1833,15 +1833,15 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 787968 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: li s4, -128 ; RV32I-NEXT: blt s3, s0, .LBB30_2 @@ -1849,8 +1849,8 @@ ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: .LBB30_2: # %start ; RV32I-NEXT: lui a3, 263676 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: li s3, 127 @@ -1858,10 +1858,10 @@ ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv s3, s4 ; RV32I-NEXT: .LBB30_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: bne a0, s0, .LBB30_6 ; RV32I-NEXT: # %bb.5: # %start @@ -1891,12 +1891,12 @@ ; RV64I-NEXT: slli a1, a0, 53 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: li s3, -128 -; RV64I-NEXT: bltz s2, .LBB30_2 +; RV64I-NEXT: bltz s1, .LBB30_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB30_2: # %start @@ -1904,19 +1904,19 @@ ; RV64I-NEXT: slli a1, a0, 34 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: li s2, 127 -; RV64I-NEXT: blt s1, a0, .LBB30_4 +; RV64I-NEXT: li s1, 127 +; RV64I-NEXT: blt s2, a0, .LBB30_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s1, s3 ; RV64I-NEXT: .LBB30_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s1, .LBB30_6 +; RV64I-NEXT: bne a0, s2, .LBB30_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s2 +; RV64I-NEXT: mv s2, s1 ; RV64I-NEXT: .LBB30_6: # %start -; RV64I-NEXT: slli a0, s1, 56 +; RV64I-NEXT: slli a0, s2, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1997,18 +1997,18 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 263934 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt ; RV32I-NEXT: li a1, 0 @@ -2017,7 +2017,7 @@ ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB32_2: # %start ; RV32I-NEXT: li a0, 255 -; RV32I-NEXT: bgtz s2, .LBB32_4 +; RV32I-NEXT: bgtz s0, .LBB32_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB32_4: # %start diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll --- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll @@ -250,16 +250,16 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: snez s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: and a0, a0, s4 @@ -378,16 +378,16 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: seqz s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: or a0, a0, s4 @@ -885,16 +885,16 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: snez s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: and a0, a0, s4 @@ -999,16 +999,16 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: seqz s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: or a0, a0, s4 diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll @@ -234,16 +234,16 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: snez s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: and a0, a0, s4 @@ -348,16 +348,16 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: seqz s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: or a0, a0, s4 diff --git a/llvm/test/CodeGen/RISCV/float-arith-strict.ll b/llvm/test/CodeGen/RISCV/float-arith-strict.ll --- a/llvm/test/CodeGen/RISCV/float-arith-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-arith-strict.ll @@ -379,18 +379,18 @@ ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -407,18 +407,18 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -459,18 +459,18 @@ ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -487,18 +487,18 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -557,18 +557,18 @@ ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -585,18 +585,18 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -637,18 +637,18 @@ ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -665,18 +665,18 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -883,16 +883,16 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsf3@plt +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -908,16 +908,16 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __mulsf3@plt +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -952,74 +952,70 @@ ; ; RV32I-LABEL: fnmadd_s_contract: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __subsf3@plt -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fnmadd_s_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __subsf3@plt -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd float 0.0, %a ; avoid negation using xor %b_ = fadd float 0.0, %b ; avoid negation using xor @@ -1054,19 +1050,19 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1082,19 +1078,19 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -903,25 +903,25 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: bltz s1, .LBB14_2 +; RV32I-NEXT: bltz s2, .LBB14_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: .LBB14_2: # %start ; RV32I-NEXT: lui a0, 391168 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: li s2, -1 ; RV32I-NEXT: li s3, -1 -; RV32I-NEXT: li s4, -1 ; RV32I-NEXT: bgtz a0, .LBB14_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: mv s3, s5 ; RV32I-NEXT: .LBB14_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 @@ -929,17 +929,17 @@ ; RV32I-NEXT: li s5, 0 ; RV32I-NEXT: bltz a0, .LBB14_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s5, s2 +; RV32I-NEXT: mv s5, s1 ; RV32I-NEXT: .LBB14_6: # %start ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB14_8 ; RV32I-NEXT: # %bb.7: # %start -; RV32I-NEXT: mv s3, s5 +; RV32I-NEXT: mv s2, s5 ; RV32I-NEXT: .LBB14_8: # %start -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1382,12 +1382,12 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 815104 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: lui s3, 1048568 -; RV32I-NEXT: bltz s2, .LBB24_2 +; RV32I-NEXT: bltz s1, .LBB24_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB24_2: # %start @@ -1395,7 +1395,7 @@ ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB24_4 +; RV32I-NEXT: bge s2, a0, .LBB24_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 ; RV32I-NEXT: addi s3, a0, -1 @@ -1403,11 +1403,11 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB24_6 +; RV32I-NEXT: bne a0, s2, .LBB24_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB24_6: # %start -; RV32I-NEXT: slli a0, s1, 16 +; RV32I-NEXT: slli a0, s2, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1428,12 +1428,12 @@ ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 815104 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB24_2 +; RV64I-NEXT: bltz s1, .LBB24_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB24_2: # %start @@ -1441,7 +1441,7 @@ ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB24_4 +; RV64I-NEXT: bge s2, a0, .LBB24_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 ; RV64I-NEXT: addiw s3, a0, -1 @@ -1449,11 +1449,11 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB24_6 +; RV64I-NEXT: bne a0, s2, .LBB24_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s2, s3 ; RV64I-NEXT: .LBB24_6: # %start -; RV64I-NEXT: slli a0, s1, 48 +; RV64I-NEXT: slli a0, s2, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1678,31 +1678,31 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 798720 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li s3, -128 -; RV32I-NEXT: bltz s2, .LBB28_2 +; RV32I-NEXT: bltz s1, .LBB28_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB28_2: # %start ; RV32I-NEXT: lui a1, 274400 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: li s2, 127 -; RV32I-NEXT: blt s1, a0, .LBB28_4 +; RV32I-NEXT: li s1, 127 +; RV32I-NEXT: blt s2, a0, .LBB28_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv s1, s3 ; RV32I-NEXT: .LBB28_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB28_6 +; RV32I-NEXT: bne a0, s2, .LBB28_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s2 +; RV32I-NEXT: mv s2, s1 ; RV32I-NEXT: .LBB28_6: # %start -; RV32I-NEXT: slli a0, s1, 24 +; RV32I-NEXT: slli a0, s2, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1723,31 +1723,31 @@ ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 798720 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: li s3, -128 -; RV64I-NEXT: bltz s2, .LBB28_2 +; RV64I-NEXT: bltz s1, .LBB28_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB28_2: # %start ; RV64I-NEXT: lui a1, 274400 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: li s2, 127 -; RV64I-NEXT: blt s1, a0, .LBB28_4 +; RV64I-NEXT: li s1, 127 +; RV64I-NEXT: blt s2, a0, .LBB28_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s1, s3 ; RV64I-NEXT: .LBB28_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB28_6 +; RV64I-NEXT: bne a0, s2, .LBB28_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s2 +; RV64I-NEXT: mv s2, s1 ; RV64I-NEXT: .LBB28_6: # %start -; RV64I-NEXT: slli a0, s1, 56 +; RV64I-NEXT: slli a0, s2, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -14,25 +14,25 @@ ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: lw a6, %lo(x)(a0) -; RV32I-NEXT: lw a7, %lo(x+4)(a0) +; RV32I-NEXT: lw a2, %lo(x)(a0) +; RV32I-NEXT: lw a1, %lo(x+4)(a0) ; RV32I-NEXT: lw a3, %lo(x+8)(a0) ; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: lui a4, %hi(y) ; RV32I-NEXT: lw a5, %lo(y)(a4) -; RV32I-NEXT: lw a2, %lo(y+4)(a4) -; RV32I-NEXT: lw a1, %lo(y+8)(a4) +; RV32I-NEXT: lw a6, %lo(y+4)(a4) +; RV32I-NEXT: lw a7, %lo(y+8)(a4) ; RV32I-NEXT: lw a4, %lo(y+12)(a4) ; RV32I-NEXT: sw a4, 20(sp) -; RV32I-NEXT: sw a1, 16(sp) -; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: sw a7, 16(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a0, 36(sp) ; RV32I-NEXT: sw a3, 32(sp) -; RV32I-NEXT: sw a7, 28(sp) +; RV32I-NEXT: sw a1, 28(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: sw a6, 24(sp) +; RV32I-NEXT: sw a2, 24(sp) ; RV32I-NEXT: call __netf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -51,26 +51,26 @@ ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: lw a6, %lo(x)(a0) -; RV32I-NEXT: lw a7, %lo(x+4)(a0) +; RV32I-NEXT: lw a3, %lo(x)(a0) +; RV32I-NEXT: lw a1, %lo(x+4)(a0) ; RV32I-NEXT: lw a2, %lo(x+8)(a0) ; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: lui a4, %hi(y) ; RV32I-NEXT: lw a5, %lo(y)(a4) -; RV32I-NEXT: lw a3, %lo(y+4)(a4) -; RV32I-NEXT: lw a1, %lo(y+8)(a4) +; RV32I-NEXT: lw a6, %lo(y+4)(a4) +; RV32I-NEXT: lw a7, %lo(y+8)(a4) ; RV32I-NEXT: lw a4, %lo(y+12)(a4) ; RV32I-NEXT: sw a4, 36(sp) -; RV32I-NEXT: sw a1, 32(sp) -; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw a6, 28(sp) ; RV32I-NEXT: sw a5, 24(sp) ; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: sw a2, 48(sp) -; RV32I-NEXT: sw a7, 44(sp) +; RV32I-NEXT: sw a1, 44(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 -; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a3, 40(sp) ; RV32I-NEXT: call __addtf3@plt ; RV32I-NEXT: lw a1, 56(sp) ; RV32I-NEXT: lw a0, 60(sp) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1159,43 +1159,43 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixdfti@plt ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw t0, 16(sp) +; RV32-NEXT: lw a3, 16(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a5, a7, -1 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a5, a4, -1 ; RV32-NEXT: beq a1, a5, .LBB18_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a4, a1, a5 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: bnez a3, .LBB18_3 +; RV32-NEXT: sltu a7, a1, a5 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: bnez a6, .LBB18_3 ; RV32-NEXT: j .LBB18_4 ; RV32-NEXT: .LBB18_2: -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: beqz a3, .LBB18_4 +; RV32-NEXT: addi a6, a0, 1 +; RV32-NEXT: snez a7, a6 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: beqz a6, .LBB18_4 ; RV32-NEXT: .LBB18_3: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a7, a2, 0 ; RV32-NEXT: .LBB18_4: # %entry ; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a4, .LBB18_7 +; RV32-NEXT: beqz a7, .LBB18_7 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a7, .LBB18_8 +; RV32-NEXT: beq a1, a4, .LBB18_8 ; RV32-NEXT: .LBB18_6: # %entry -; RV32-NEXT: sltu a4, a7, a1 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: bne a3, a6, .LBB18_9 ; RV32-NEXT: j .LBB18_10 ; RV32-NEXT: .LBB18_7: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: li t0, 0 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a7, .LBB18_6 +; RV32-NEXT: bne a1, a4, .LBB18_6 ; RV32-NEXT: .LBB18_8: ; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: beq a3, a6, .LBB18_10 ; RV32-NEXT: .LBB18_9: # %entry ; RV32-NEXT: slt a4, a6, a2 @@ -1441,43 +1441,43 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw t0, 16(sp) +; RV32-NEXT: lw a3, 16(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a5, a7, -1 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a5, a4, -1 ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a4, a1, a5 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: bnez a3, .LBB21_3 +; RV32-NEXT: sltu a7, a1, a5 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: bnez a6, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: beqz a3, .LBB21_4 +; RV32-NEXT: addi a6, a0, 1 +; RV32-NEXT: snez a7, a6 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: beqz a6, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a7, a2, 0 ; RV32-NEXT: .LBB21_4: # %entry ; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a4, .LBB21_7 +; RV32-NEXT: beqz a7, .LBB21_7 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a7, .LBB21_8 +; RV32-NEXT: beq a1, a4, .LBB21_8 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: sltu a4, a7, a1 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: bne a3, a6, .LBB21_9 ; RV32-NEXT: j .LBB21_10 ; RV32-NEXT: .LBB21_7: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: li t0, 0 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a7, .LBB21_6 +; RV32-NEXT: bne a1, a4, .LBB21_6 ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: beq a3, a6, .LBB21_10 ; RV32-NEXT: .LBB21_9: # %entry ; RV32-NEXT: slt a4, a6, a2 @@ -1685,43 +1685,43 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw t0, 16(sp) +; RV32-NEXT: lw a3, 16(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a5, a7, -1 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a5, a4, -1 ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a4, a1, a5 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: bnez a3, .LBB24_3 +; RV32-NEXT: sltu a7, a1, a5 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: bnez a6, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: beqz a3, .LBB24_4 +; RV32-NEXT: addi a6, a0, 1 +; RV32-NEXT: snez a7, a6 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: beqz a6, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a7, a2, 0 ; RV32-NEXT: .LBB24_4: # %entry ; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a4, .LBB24_7 +; RV32-NEXT: beqz a7, .LBB24_7 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a7, .LBB24_8 +; RV32-NEXT: beq a1, a4, .LBB24_8 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: sltu a4, a7, a1 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: bne a3, a6, .LBB24_9 ; RV32-NEXT: j .LBB24_10 ; RV32-NEXT: .LBB24_7: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: li t0, 0 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a7, .LBB24_6 +; RV32-NEXT: bne a1, a4, .LBB24_6 ; RV32-NEXT: .LBB24_8: ; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: beq a3, a6, .LBB24_10 ; RV32-NEXT: .LBB24_9: # %entry ; RV32-NEXT: slt a4, a6, a2 @@ -3090,109 +3090,109 @@ ; RV32-NEXT: lw a5, 8(sp) ; RV32-NEXT: lw a3, 20(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a6, -1 -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a2, -1 +; RV32-NEXT: mv a7, a5 ; RV32-NEXT: bltz a3, .LBB45_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a4, -1 +; RV32-NEXT: li a7, -1 ; RV32-NEXT: .LBB45_2: # %entry -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a2, a7, -1 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgeu a1, a2, .LBB45_19 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a6, a4, -1 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgeu a1, a6, .LBB45_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: bne a1, a2, .LBB45_20 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: bne a1, a6, .LBB45_20 ; RV32-NEXT: .LBB45_4: # %entry -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: bnez a0, .LBB45_21 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: bnez t0, .LBB45_21 ; RV32-NEXT: .LBB45_5: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bgez a3, .LBB45_22 ; RV32-NEXT: .LBB45_6: # %entry -; RV32-NEXT: bgeu a1, a2, .LBB45_23 +; RV32-NEXT: bgeu a1, a6, .LBB45_23 ; RV32-NEXT: .LBB45_7: # %entry -; RV32-NEXT: bnez a0, .LBB45_24 +; RV32-NEXT: bnez t0, .LBB45_24 ; RV32-NEXT: .LBB45_8: # %entry -; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: bnez a3, .LBB45_25 ; RV32-NEXT: .LBB45_9: # %entry ; RV32-NEXT: bgez a3, .LBB45_26 ; RV32-NEXT: .LBB45_10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bgeu a7, a1, .LBB45_27 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgeu a4, a1, .LBB45_27 ; RV32-NEXT: .LBB45_11: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a7, .LBB45_28 +; RV32-NEXT: bne a1, a4, .LBB45_28 ; RV32-NEXT: .LBB45_12: # %entry ; RV32-NEXT: bltz a3, .LBB45_29 ; RV32-NEXT: .LBB45_13: # %entry -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: bne a2, a6, .LBB45_30 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: bne a6, a2, .LBB45_30 ; RV32-NEXT: .LBB45_14: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bltz a3, .LBB45_31 ; RV32-NEXT: .LBB45_15: # %entry -; RV32-NEXT: bgeu a7, a1, .LBB45_32 +; RV32-NEXT: bgeu a4, a1, .LBB45_32 ; RV32-NEXT: .LBB45_16: # %entry -; RV32-NEXT: beq a2, a6, .LBB45_18 +; RV32-NEXT: beq a6, a2, .LBB45_18 ; RV32-NEXT: .LBB45_17: # %entry -; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB45_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB45_19: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: beq a1, a2, .LBB45_4 +; RV32-NEXT: li t0, -1 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beq a1, a6, .LBB45_4 ; RV32-NEXT: .LBB45_20: # %entry -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: beqz a0, .LBB45_5 +; RV32-NEXT: mv a5, t0 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: beqz t0, .LBB45_5 ; RV32-NEXT: .LBB45_21: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bltz a3, .LBB45_6 ; RV32-NEXT: .LBB45_22: # %entry -; RV32-NEXT: mv a4, a2 -; RV32-NEXT: bltu a1, a2, .LBB45_7 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: bltu a1, a6, .LBB45_7 ; RV32-NEXT: .LBB45_23: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: beqz a0, .LBB45_8 +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: beqz t0, .LBB45_8 ; RV32-NEXT: .LBB45_24: # %entry -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: li a2, 0 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: beqz a3, .LBB45_9 ; RV32-NEXT: .LBB45_25: # %entry -; RV32-NEXT: srai a0, a3, 31 -; RV32-NEXT: and a2, a0, t0 +; RV32-NEXT: srai a6, a3, 31 +; RV32-NEXT: and a6, a6, a0 ; RV32-NEXT: bltz a3, .LBB45_10 ; RV32-NEXT: .LBB45_26: # %entry ; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bltu a7, a1, .LBB45_11 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltu a4, a1, .LBB45_11 ; RV32-NEXT: .LBB45_27: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a7, 0 ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a7, .LBB45_12 +; RV32-NEXT: beq a1, a4, .LBB45_12 ; RV32-NEXT: .LBB45_28: # %entry -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a0, a7 ; RV32-NEXT: bgez a3, .LBB45_13 ; RV32-NEXT: .LBB45_29: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: beq a2, a6, .LBB45_14 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: beq a6, a2, .LBB45_14 ; RV32-NEXT: .LBB45_30: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bgez a3, .LBB45_15 ; RV32-NEXT: .LBB45_31: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: bltu a7, a1, .LBB45_16 +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: bltu a4, a1, .LBB45_16 ; RV32-NEXT: .LBB45_32: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a2, a6, .LBB45_17 +; RV32-NEXT: bne a6, a2, .LBB45_17 ; RV32-NEXT: j .LBB45_18 ; ; RV64IF-LABEL: stest_f64i64_mm: @@ -3514,109 +3514,109 @@ ; RV32-NEXT: lw a5, 8(sp) ; RV32-NEXT: lw a3, 20(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a6, -1 -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a2, -1 +; RV32-NEXT: mv a7, a5 ; RV32-NEXT: bltz a3, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a4, -1 +; RV32-NEXT: li a7, -1 ; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a2, a7, -1 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgeu a1, a2, .LBB48_19 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a6, a4, -1 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgeu a1, a6, .LBB48_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: bne a1, a2, .LBB48_20 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: bne a1, a6, .LBB48_20 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: bnez a0, .LBB48_21 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: bnez t0, .LBB48_21 ; RV32-NEXT: .LBB48_5: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bgez a3, .LBB48_22 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: bgeu a1, a2, .LBB48_23 +; RV32-NEXT: bgeu a1, a6, .LBB48_23 ; RV32-NEXT: .LBB48_7: # %entry -; RV32-NEXT: bnez a0, .LBB48_24 +; RV32-NEXT: bnez t0, .LBB48_24 ; RV32-NEXT: .LBB48_8: # %entry -; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: bnez a3, .LBB48_25 ; RV32-NEXT: .LBB48_9: # %entry ; RV32-NEXT: bgez a3, .LBB48_26 ; RV32-NEXT: .LBB48_10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bgeu a7, a1, .LBB48_27 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgeu a4, a1, .LBB48_27 ; RV32-NEXT: .LBB48_11: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a7, .LBB48_28 +; RV32-NEXT: bne a1, a4, .LBB48_28 ; RV32-NEXT: .LBB48_12: # %entry ; RV32-NEXT: bltz a3, .LBB48_29 ; RV32-NEXT: .LBB48_13: # %entry -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: bne a2, a6, .LBB48_30 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: bne a6, a2, .LBB48_30 ; RV32-NEXT: .LBB48_14: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bltz a3, .LBB48_31 ; RV32-NEXT: .LBB48_15: # %entry -; RV32-NEXT: bgeu a7, a1, .LBB48_32 +; RV32-NEXT: bgeu a4, a1, .LBB48_32 ; RV32-NEXT: .LBB48_16: # %entry -; RV32-NEXT: beq a2, a6, .LBB48_18 +; RV32-NEXT: beq a6, a2, .LBB48_18 ; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: beq a1, a2, .LBB48_4 +; RV32-NEXT: li t0, -1 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beq a1, a6, .LBB48_4 ; RV32-NEXT: .LBB48_20: # %entry -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: beqz a0, .LBB48_5 +; RV32-NEXT: mv a5, t0 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: beqz t0, .LBB48_5 ; RV32-NEXT: .LBB48_21: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bltz a3, .LBB48_6 ; RV32-NEXT: .LBB48_22: # %entry -; RV32-NEXT: mv a4, a2 -; RV32-NEXT: bltu a1, a2, .LBB48_7 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: bltu a1, a6, .LBB48_7 ; RV32-NEXT: .LBB48_23: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: beqz a0, .LBB48_8 +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: beqz t0, .LBB48_8 ; RV32-NEXT: .LBB48_24: # %entry -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: li a2, 0 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: beqz a3, .LBB48_9 ; RV32-NEXT: .LBB48_25: # %entry -; RV32-NEXT: srai a0, a3, 31 -; RV32-NEXT: and a2, a0, t0 +; RV32-NEXT: srai a6, a3, 31 +; RV32-NEXT: and a6, a6, a0 ; RV32-NEXT: bltz a3, .LBB48_10 ; RV32-NEXT: .LBB48_26: # %entry ; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bltu a7, a1, .LBB48_11 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltu a4, a1, .LBB48_11 ; RV32-NEXT: .LBB48_27: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a7, 0 ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a7, .LBB48_12 +; RV32-NEXT: beq a1, a4, .LBB48_12 ; RV32-NEXT: .LBB48_28: # %entry -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a0, a7 ; RV32-NEXT: bgez a3, .LBB48_13 ; RV32-NEXT: .LBB48_29: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: beq a2, a6, .LBB48_14 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: beq a6, a2, .LBB48_14 ; RV32-NEXT: .LBB48_30: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bgez a3, .LBB48_15 ; RV32-NEXT: .LBB48_31: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: bltu a7, a1, .LBB48_16 +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: bltu a4, a1, .LBB48_16 ; RV32-NEXT: .LBB48_32: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a2, a6, .LBB48_17 +; RV32-NEXT: bne a6, a2, .LBB48_17 ; RV32-NEXT: j .LBB48_18 ; ; RV64-LABEL: stest_f32i64_mm: @@ -3886,109 +3886,109 @@ ; RV32-NEXT: lw a5, 8(sp) ; RV32-NEXT: lw a3, 20(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a6, -1 -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a2, -1 +; RV32-NEXT: mv a7, a5 ; RV32-NEXT: bltz a3, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a4, -1 +; RV32-NEXT: li a7, -1 ; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a2, a7, -1 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgeu a1, a2, .LBB51_19 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a6, a4, -1 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgeu a1, a6, .LBB51_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: bne a1, a2, .LBB51_20 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: bne a1, a6, .LBB51_20 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: bnez a0, .LBB51_21 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: bnez t0, .LBB51_21 ; RV32-NEXT: .LBB51_5: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bgez a3, .LBB51_22 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: bgeu a1, a2, .LBB51_23 +; RV32-NEXT: bgeu a1, a6, .LBB51_23 ; RV32-NEXT: .LBB51_7: # %entry -; RV32-NEXT: bnez a0, .LBB51_24 +; RV32-NEXT: bnez t0, .LBB51_24 ; RV32-NEXT: .LBB51_8: # %entry -; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: bnez a3, .LBB51_25 ; RV32-NEXT: .LBB51_9: # %entry ; RV32-NEXT: bgez a3, .LBB51_26 ; RV32-NEXT: .LBB51_10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bgeu a7, a1, .LBB51_27 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgeu a4, a1, .LBB51_27 ; RV32-NEXT: .LBB51_11: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a7, .LBB51_28 +; RV32-NEXT: bne a1, a4, .LBB51_28 ; RV32-NEXT: .LBB51_12: # %entry ; RV32-NEXT: bltz a3, .LBB51_29 ; RV32-NEXT: .LBB51_13: # %entry -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: bne a2, a6, .LBB51_30 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: bne a6, a2, .LBB51_30 ; RV32-NEXT: .LBB51_14: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bltz a3, .LBB51_31 ; RV32-NEXT: .LBB51_15: # %entry -; RV32-NEXT: bgeu a7, a1, .LBB51_32 +; RV32-NEXT: bgeu a4, a1, .LBB51_32 ; RV32-NEXT: .LBB51_16: # %entry -; RV32-NEXT: beq a2, a6, .LBB51_18 +; RV32-NEXT: beq a6, a2, .LBB51_18 ; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: beq a1, a2, .LBB51_4 +; RV32-NEXT: li t0, -1 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beq a1, a6, .LBB51_4 ; RV32-NEXT: .LBB51_20: # %entry -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: beqz a0, .LBB51_5 +; RV32-NEXT: mv a5, t0 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: beqz t0, .LBB51_5 ; RV32-NEXT: .LBB51_21: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bltz a3, .LBB51_6 ; RV32-NEXT: .LBB51_22: # %entry -; RV32-NEXT: mv a4, a2 -; RV32-NEXT: bltu a1, a2, .LBB51_7 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: bltu a1, a6, .LBB51_7 ; RV32-NEXT: .LBB51_23: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: beqz a0, .LBB51_8 +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: beqz t0, .LBB51_8 ; RV32-NEXT: .LBB51_24: # %entry -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: li a2, 0 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: beqz a3, .LBB51_9 ; RV32-NEXT: .LBB51_25: # %entry -; RV32-NEXT: srai a0, a3, 31 -; RV32-NEXT: and a2, a0, t0 +; RV32-NEXT: srai a6, a3, 31 +; RV32-NEXT: and a6, a6, a0 ; RV32-NEXT: bltz a3, .LBB51_10 ; RV32-NEXT: .LBB51_26: # %entry ; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bltu a7, a1, .LBB51_11 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltu a4, a1, .LBB51_11 ; RV32-NEXT: .LBB51_27: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a7, 0 ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a7, .LBB51_12 +; RV32-NEXT: beq a1, a4, .LBB51_12 ; RV32-NEXT: .LBB51_28: # %entry -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a0, a7 ; RV32-NEXT: bgez a3, .LBB51_13 ; RV32-NEXT: .LBB51_29: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: beq a2, a6, .LBB51_14 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: beq a6, a2, .LBB51_14 ; RV32-NEXT: .LBB51_30: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bgez a3, .LBB51_15 ; RV32-NEXT: .LBB51_31: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: bltu a7, a1, .LBB51_16 +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: bltu a4, a1, .LBB51_16 ; RV32-NEXT: .LBB51_32: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a2, a6, .LBB51_17 +; RV32-NEXT: bne a6, a2, .LBB51_17 ; RV32-NEXT: j .LBB51_18 ; ; RV64-LABEL: stest_f16i64_mm: diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -118,56 +118,56 @@ ; CHECK-NEXT: fmv.w.x ft2, a3 ; CHECK-NEXT: fmv.w.x ft0, a2 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: lui a6, 524288 -; CHECK-NEXT: addiw a5, a6, -1 +; CHECK-NEXT: lui a4, 524288 +; CHECK-NEXT: addiw a6, a4, -1 ; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB3_2 +; CHECK-NEXT: blt a2, a6, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: mv a2, a6 ; CHECK-NEXT: .LBB3_2: # %entry ; CHECK-NEXT: fmv.w.x ft1, a1 ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB3_11 +; CHECK-NEXT: bge a3, a6, .LBB3_11 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB3_12 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: bge a1, a6, .LBB3_12 ; CHECK-NEXT: .LBB3_4: # %entry -; CHECK-NEXT: bge a4, a5, .LBB3_13 +; CHECK-NEXT: bge a5, a6, .LBB3_13 ; CHECK-NEXT: .LBB3_5: # %entry -; CHECK-NEXT: bge a6, a4, .LBB3_14 +; CHECK-NEXT: bge a4, a5, .LBB3_14 ; CHECK-NEXT: .LBB3_6: # %entry -; CHECK-NEXT: bge a6, a1, .LBB3_15 +; CHECK-NEXT: bge a4, a1, .LBB3_15 ; CHECK-NEXT: .LBB3_7: # %entry -; CHECK-NEXT: bge a6, a3, .LBB3_16 +; CHECK-NEXT: bge a4, a3, .LBB3_16 ; CHECK-NEXT: .LBB3_8: # %entry -; CHECK-NEXT: blt a6, a2, .LBB3_10 +; CHECK-NEXT: blt a4, a2, .LBB3_10 ; CHECK-NEXT: .LBB3_9: # %entry ; CHECK-NEXT: lui a2, 524288 ; CHECK-NEXT: .LBB3_10: # %entry ; CHECK-NEXT: sw a2, 12(a0) ; CHECK-NEXT: sw a3, 8(a0) ; CHECK-NEXT: sw a1, 4(a0) -; CHECK-NEXT: sw a4, 0(a0) +; CHECK-NEXT: sw a5, 0(a0) ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB3_4 +; CHECK-NEXT: mv a3, a6 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: blt a1, a6, .LBB3_4 ; CHECK-NEXT: .LBB3_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB3_5 +; CHECK-NEXT: mv a1, a6 +; CHECK-NEXT: blt a5, a6, .LBB3_5 ; CHECK-NEXT: .LBB3_13: # %entry -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: blt a6, a4, .LBB3_6 +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: blt a4, a5, .LBB3_6 ; CHECK-NEXT: .LBB3_14: # %entry -; CHECK-NEXT: lui a4, 524288 -; CHECK-NEXT: blt a6, a1, .LBB3_7 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: blt a4, a1, .LBB3_7 ; CHECK-NEXT: .LBB3_15: # %entry ; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: blt a6, a3, .LBB3_8 +; CHECK-NEXT: blt a4, a3, .LBB3_8 ; CHECK-NEXT: .LBB3_16: # %entry ; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: bge a6, a2, .LBB3_9 +; CHECK-NEXT: bge a4, a2, .LBB3_9 ; CHECK-NEXT: j .LBB3_10 entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -311,23 +311,23 @@ ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -338,10 +338,10 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB6_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: bge s0, a4, .LBB6_11 +; CHECK-NEXT: bge s3, a4, .LBB6_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: bge a2, a4, .LBB6_12 @@ -352,16 +352,16 @@ ; CHECK-NEXT: .LBB6_6: # %entry ; CHECK-NEXT: bge a1, a2, .LBB6_15 ; CHECK-NEXT: .LBB6_7: # %entry -; CHECK-NEXT: bge a1, s0, .LBB6_16 +; CHECK-NEXT: bge a1, s3, .LBB6_16 ; CHECK-NEXT: .LBB6_8: # %entry ; CHECK-NEXT: blt a1, a0, .LBB6_10 ; CHECK-NEXT: .LBB6_9: # %entry ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: .LBB6_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a2, 4(s3) -; CHECK-NEXT: sw a3, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a2, 4(s0) +; CHECK-NEXT: sw a3, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -371,7 +371,7 @@ ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB6_11: # %entry -; CHECK-NEXT: mv s0, a4 +; CHECK-NEXT: mv s3, a4 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: blt a2, a4, .LBB6_4 ; CHECK-NEXT: .LBB6_12: # %entry @@ -385,9 +385,9 @@ ; CHECK-NEXT: blt a1, a2, .LBB6_7 ; CHECK-NEXT: .LBB6_15: # %entry ; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: blt a1, s0, .LBB6_8 +; CHECK-NEXT: blt a1, s3, .LBB6_8 ; CHECK-NEXT: .LBB6_16: # %entry -; CHECK-NEXT: lui s0, 524288 +; CHECK-NEXT: lui s3, 524288 ; CHECK-NEXT: bge a1, a0, .LBB6_9 ; CHECK-NEXT: j .LBB6_10 entry: @@ -418,23 +418,23 @@ ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s3, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: fcvt.lu.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -445,10 +445,10 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz -; CHECK-NEXT: bgeu s1, a1, .LBB7_7 +; CHECK-NEXT: bgeu s3, a1, .LBB7_7 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bgeu a2, a1, .LBB7_8 @@ -459,7 +459,7 @@ ; CHECK-NEXT: .LBB7_6: # %entry ; CHECK-NEXT: sw a3, 12(s0) ; CHECK-NEXT: sw a2, 8(s0) -; CHECK-NEXT: sw s1, 4(s0) +; CHECK-NEXT: sw s3, 4(s0) ; CHECK-NEXT: sw a0, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -470,7 +470,7 @@ ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_7: # %entry -; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv s3, a1 ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bltu a2, a1, .LBB7_4 ; CHECK-NEXT: .LBB7_8: # %entry @@ -503,23 +503,23 @@ ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -530,10 +530,10 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: .LBB8_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft1, rtz -; CHECK-NEXT: bge s0, a3, .LBB8_11 +; CHECK-NEXT: bge s3, a3, .LBB8_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: bge a1, a3, .LBB8_12 @@ -544,16 +544,16 @@ ; CHECK-NEXT: .LBB8_6: # %entry ; CHECK-NEXT: blez a1, .LBB8_15 ; CHECK-NEXT: .LBB8_7: # %entry -; CHECK-NEXT: blez s0, .LBB8_16 +; CHECK-NEXT: blez s3, .LBB8_16 ; CHECK-NEXT: .LBB8_8: # %entry ; CHECK-NEXT: bgtz a0, .LBB8_10 ; CHECK-NEXT: .LBB8_9: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB8_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a1, 4(s3) -; CHECK-NEXT: sw a2, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a1, 4(s0) +; CHECK-NEXT: sw a2, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -563,7 +563,7 @@ ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_11: # %entry -; CHECK-NEXT: mv s0, a3 +; CHECK-NEXT: mv s3, a3 ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: blt a1, a3, .LBB8_4 ; CHECK-NEXT: .LBB8_12: # %entry @@ -577,9 +577,9 @@ ; CHECK-NEXT: bgtz a1, .LBB8_7 ; CHECK-NEXT: .LBB8_15: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s0, .LBB8_8 +; CHECK-NEXT: bgtz s3, .LBB8_8 ; CHECK-NEXT: .LBB8_16: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li s3, 0 ; CHECK-NEXT: blez a0, .LBB8_9 ; CHECK-NEXT: j .LBB8_10 entry: @@ -903,7 +903,6 @@ ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -914,23 +913,19 @@ ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -945,81 +940,84 @@ ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB15_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB15_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB15_4 +; CHECK-NEXT: blt s7, a7, .LBB15_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB15_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB15_6 +; CHECK-NEXT: blt a1, a7, .LBB15_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB15_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB15_8 +; CHECK-NEXT: blt a2, a7, .LBB15_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB15_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB15_10 +; CHECK-NEXT: blt a3, a7, .LBB15_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB15_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB15_23 +; CHECK-NEXT: bge a4, a7, .LBB15_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB15_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB15_24 ; CHECK-NEXT: .LBB15_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB15_25 +; CHECK-NEXT: bge a6, a7, .LBB15_25 ; CHECK-NEXT: .LBB15_13: # %entry -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: bge s1, s0, .LBB15_26 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: bge a7, a6, .LBB15_26 ; CHECK-NEXT: .LBB15_14: # %entry -; CHECK-NEXT: bge s1, a5, .LBB15_27 +; CHECK-NEXT: bge a7, a5, .LBB15_27 ; CHECK-NEXT: .LBB15_15: # %entry -; CHECK-NEXT: bge s1, a4, .LBB15_28 +; CHECK-NEXT: bge a7, a4, .LBB15_28 ; CHECK-NEXT: .LBB15_16: # %entry -; CHECK-NEXT: bge s1, a3, .LBB15_29 +; CHECK-NEXT: bge a7, a3, .LBB15_29 ; CHECK-NEXT: .LBB15_17: # %entry -; CHECK-NEXT: bge s1, a2, .LBB15_30 +; CHECK-NEXT: bge a7, a2, .LBB15_30 ; CHECK-NEXT: .LBB15_18: # %entry -; CHECK-NEXT: bge s1, a1, .LBB15_31 +; CHECK-NEXT: bge a7, a1, .LBB15_31 ; CHECK-NEXT: .LBB15_19: # %entry -; CHECK-NEXT: bge s1, s9, .LBB15_32 +; CHECK-NEXT: bge a7, s7, .LBB15_32 ; CHECK-NEXT: .LBB15_20: # %entry -; CHECK-NEXT: blt s1, a0, .LBB15_22 +; CHECK-NEXT: blt a7, a0, .LBB15_22 ; CHECK-NEXT: .LBB15_21: # %entry ; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: .LBB15_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -1030,41 +1028,40 @@ ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB15_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB15_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB15_12 ; CHECK-NEXT: .LBB15_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB15_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB15_13 ; CHECK-NEXT: .LBB15_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: blt s1, s0, .LBB15_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: blt a7, a6, .LBB15_14 ; CHECK-NEXT: .LBB15_26: # %entry -; CHECK-NEXT: lui s0, 1048568 -; CHECK-NEXT: blt s1, a5, .LBB15_15 +; CHECK-NEXT: lui a6, 1048568 +; CHECK-NEXT: blt a7, a5, .LBB15_15 ; CHECK-NEXT: .LBB15_27: # %entry ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt s1, a4, .LBB15_16 +; CHECK-NEXT: blt a7, a4, .LBB15_16 ; CHECK-NEXT: .LBB15_28: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt s1, a3, .LBB15_17 +; CHECK-NEXT: blt a7, a3, .LBB15_17 ; CHECK-NEXT: .LBB15_29: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: blt s1, a2, .LBB15_18 +; CHECK-NEXT: blt a7, a2, .LBB15_18 ; CHECK-NEXT: .LBB15_30: # %entry ; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: blt s1, a1, .LBB15_19 +; CHECK-NEXT: blt a7, a1, .LBB15_19 ; CHECK-NEXT: .LBB15_31: # %entry ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt s1, s9, .LBB15_20 +; CHECK-NEXT: blt a7, s7, .LBB15_20 ; CHECK-NEXT: .LBB15_32: # %entry -; CHECK-NEXT: lui s9, 1048568 -; CHECK-NEXT: bge s1, a0, .LBB15_21 +; CHECK-NEXT: lui s7, 1048568 +; CHECK-NEXT: bge a7, a0, .LBB15_21 ; CHECK-NEXT: j .LBB15_22 entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -1102,20 +1099,17 @@ ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 ; CHECK-NEXT: lhu s6, 0(a1) -; CHECK-NEXT: lhu s2, 56(a1) -; CHECK-NEXT: lhu s3, 48(a1) -; CHECK-NEXT: lhu s4, 40(a1) -; CHECK-NEXT: lhu s5, 32(a1) -; CHECK-NEXT: lhu s7, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 48(a1) +; CHECK-NEXT: lhu s3, 40(a1) +; CHECK-NEXT: lhu s4, 32(a1) +; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: lhu s7, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s8, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 @@ -1131,64 +1125,67 @@ ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.lu.s s8, ft0, rtz +; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: bltu a6, a1, .LBB16_2 +; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addiw a1, a1, -1 +; CHECK-NEXT: bltu a0, a1, .LBB16_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB16_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 +; CHECK-NEXT: fmv.w.x ft1, s5 ; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.lu.s a2, ft0, rtz -; CHECK-NEXT: bltu s8, a1, .LBB16_4 +; CHECK-NEXT: bltu s7, a1, .LBB16_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s8, a1 +; CHECK-NEXT: mv s7, a1 ; CHECK-NEXT: .LBB16_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz ; CHECK-NEXT: bltu a2, a1, .LBB16_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB16_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz ; CHECK-NEXT: bltu a3, a1, .LBB16_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB16_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz ; CHECK-NEXT: bltu a4, a1, .LBB16_10 ; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB16_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: fmv.w.x ft1, s1 +; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz ; CHECK-NEXT: bgeu a5, a1, .LBB16_15 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: bgeu s1, a1, .LBB16_16 +; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: bgeu a6, a1, .LBB16_16 ; CHECK-NEXT: .LBB16_12: # %entry -; CHECK-NEXT: bltu a0, a1, .LBB16_14 +; CHECK-NEXT: bltu a7, a1, .LBB16_14 ; CHECK-NEXT: .LBB16_13: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: .LBB16_14: # %entry -; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s1, 12(s0) +; CHECK-NEXT: sh a7, 14(s0) +; CHECK-NEXT: sh a6, 12(s0) ; CHECK-NEXT: sh a5, 10(s0) ; CHECK-NEXT: sh a4, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a2, 4(s0) -; CHECK-NEXT: sh s8, 2(s0) -; CHECK-NEXT: sh a6, 0(s0) +; CHECK-NEXT: sh s7, 2(s0) +; CHECK-NEXT: sh a0, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -1203,11 +1200,11 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB16_15: # %entry ; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: bltu s1, a1, .LBB16_12 +; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: bltu a6, a1, .LBB16_12 ; CHECK-NEXT: .LBB16_16: # %entry -; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: bgeu a0, a1, .LBB16_13 +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: bgeu a7, a1, .LBB16_13 ; CHECK-NEXT: j .LBB16_14 entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -1232,7 +1229,6 @@ ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1243,23 +1239,19 @@ ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -1274,55 +1266,58 @@ ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB17_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB17_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB17_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB17_4 +; CHECK-NEXT: blt s7, a7, .LBB17_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB17_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB17_6 +; CHECK-NEXT: blt a1, a7, .LBB17_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB17_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB17_8 +; CHECK-NEXT: blt a2, a7, .LBB17_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB17_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB17_10 +; CHECK-NEXT: blt a3, a7, .LBB17_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB17_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB17_23 +; CHECK-NEXT: bge a4, a7, .LBB17_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB17_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB17_24 ; CHECK-NEXT: .LBB17_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB17_25 +; CHECK-NEXT: bge a6, a7, .LBB17_25 ; CHECK-NEXT: .LBB17_13: # %entry -; CHECK-NEXT: blez s0, .LBB17_26 +; CHECK-NEXT: blez a6, .LBB17_26 ; CHECK-NEXT: .LBB17_14: # %entry ; CHECK-NEXT: blez a5, .LBB17_27 ; CHECK-NEXT: .LBB17_15: # %entry @@ -1334,20 +1329,20 @@ ; CHECK-NEXT: .LBB17_18: # %entry ; CHECK-NEXT: blez a1, .LBB17_31 ; CHECK-NEXT: .LBB17_19: # %entry -; CHECK-NEXT: blez s9, .LBB17_32 +; CHECK-NEXT: blez s7, .LBB17_32 ; CHECK-NEXT: .LBB17_20: # %entry ; CHECK-NEXT: bgtz a0, .LBB17_22 ; CHECK-NEXT: .LBB17_21: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB17_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -1358,21 +1353,20 @@ ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB17_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB17_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB17_12 ; CHECK-NEXT: .LBB17_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB17_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB17_13 ; CHECK-NEXT: .LBB17_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: bgtz s0, .LBB17_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: bgtz a6, .LBB17_14 ; CHECK-NEXT: .LBB17_26: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: bgtz a5, .LBB17_15 ; CHECK-NEXT: .LBB17_27: # %entry ; CHECK-NEXT: li a5, 0 @@ -1388,9 +1382,9 @@ ; CHECK-NEXT: bgtz a1, .LBB17_19 ; CHECK-NEXT: .LBB17_31: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s9, .LBB17_20 +; CHECK-NEXT: bgtz s7, .LBB17_20 ; CHECK-NEXT: .LBB17_32: # %entry -; CHECK-NEXT: li s9, 0 +; CHECK-NEXT: li s7, 0 ; CHECK-NEXT: blez a0, .LBB17_21 ; CHECK-NEXT: j .LBB17_22 entry: @@ -1418,11 +1412,11 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -1432,7 +1426,7 @@ ; CHECK-NEXT: slti a4, a1, 0 ; CHECK-NEXT: bnez s1, .LBB18_4 ; CHECK-NEXT: .LBB18_2: -; CHECK-NEXT: sltu a5, s2, a3 +; CHECK-NEXT: sltu a5, s0, a3 ; CHECK-NEXT: beqz a5, .LBB18_5 ; CHECK-NEXT: j .LBB18_6 ; CHECK-NEXT: .LBB18_3: @@ -1443,7 +1437,7 @@ ; CHECK-NEXT: bnez a5, .LBB18_6 ; CHECK-NEXT: .LBB18_5: # %entry ; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB18_6: # %entry ; CHECK-NEXT: beqz a4, .LBB18_10 ; CHECK-NEXT: # %bb.7: # %entry @@ -1453,7 +1447,7 @@ ; CHECK-NEXT: slt a1, a0, a1 ; CHECK-NEXT: bne s1, a0, .LBB18_12 ; CHECK-NEXT: .LBB18_9: -; CHECK-NEXT: sltu a0, a3, s2 +; CHECK-NEXT: sltu a0, a3, s0 ; CHECK-NEXT: beqz a0, .LBB18_13 ; CHECK-NEXT: j .LBB18_14 ; CHECK-NEXT: .LBB18_10: # %entry @@ -1468,13 +1462,13 @@ ; CHECK-NEXT: slt a0, a0, s1 ; CHECK-NEXT: bnez a0, .LBB18_14 ; CHECK-NEXT: .LBB18_13: # %entry -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB18_14: # %entry ; CHECK-NEXT: bnez a1, .LBB18_16 ; CHECK-NEXT: # %bb.15: # %entry ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB18_16: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1505,18 +1499,18 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv s2, a1 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: beqz a1, .LBB19_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB19_2: # %entry -; CHECK-NEXT: beqz s2, .LBB19_4 +; CHECK-NEXT: beqz s1, .LBB19_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB19_4: # %entry @@ -1548,12 +1542,12 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bgtz a1, .LBB20_7 @@ -1584,10 +1578,10 @@ ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: blez s1, .LBB20_4 ; CHECK-NEXT: .LBB20_10: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: bnez a3, .LBB20_5 ; CHECK-NEXT: .LBB20_11: -; CHECK-NEXT: snez a1, s2 +; CHECK-NEXT: snez a1, s0 ; CHECK-NEXT: beqz a2, .LBB20_6 ; CHECK-NEXT: .LBB20_12: # %entry ; CHECK-NEXT: sgtz a2, a2 @@ -1597,9 +1591,9 @@ ; CHECK-NEXT: .LBB20_14: # %entry ; CHECK-NEXT: bnez a1, .LBB20_16 ; CHECK-NEXT: # %bb.15: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB20_16: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -1629,11 +1623,11 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -1643,7 +1637,7 @@ ; CHECK-NEXT: slti a4, a1, 0 ; CHECK-NEXT: bnez s1, .LBB21_4 ; CHECK-NEXT: .LBB21_2: -; CHECK-NEXT: sltu a5, s2, a3 +; CHECK-NEXT: sltu a5, s0, a3 ; CHECK-NEXT: beqz a5, .LBB21_5 ; CHECK-NEXT: j .LBB21_6 ; CHECK-NEXT: .LBB21_3: @@ -1654,7 +1648,7 @@ ; CHECK-NEXT: bnez a5, .LBB21_6 ; CHECK-NEXT: .LBB21_5: # %entry ; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB21_6: # %entry ; CHECK-NEXT: beqz a4, .LBB21_10 ; CHECK-NEXT: # %bb.7: # %entry @@ -1664,7 +1658,7 @@ ; CHECK-NEXT: slt a1, a0, a1 ; CHECK-NEXT: bne s1, a0, .LBB21_12 ; CHECK-NEXT: .LBB21_9: -; CHECK-NEXT: sltu a0, a3, s2 +; CHECK-NEXT: sltu a0, a3, s0 ; CHECK-NEXT: beqz a0, .LBB21_13 ; CHECK-NEXT: j .LBB21_14 ; CHECK-NEXT: .LBB21_10: # %entry @@ -1679,13 +1673,13 @@ ; CHECK-NEXT: slt a0, a0, s1 ; CHECK-NEXT: bnez a0, .LBB21_14 ; CHECK-NEXT: .LBB21_13: # %entry -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB21_14: # %entry ; CHECK-NEXT: bnez a1, .LBB21_16 ; CHECK-NEXT: # %bb.15: # %entry ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB21_16: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1716,18 +1710,18 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv s2, a1 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: beqz a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB22_2: # %entry -; CHECK-NEXT: beqz s2, .LBB22_4 +; CHECK-NEXT: beqz s1, .LBB22_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB22_4: # %entry @@ -1759,12 +1753,12 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bgtz a1, .LBB23_7 @@ -1795,10 +1789,10 @@ ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: blez s1, .LBB23_4 ; CHECK-NEXT: .LBB23_10: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: bnez a3, .LBB23_5 ; CHECK-NEXT: .LBB23_11: -; CHECK-NEXT: snez a1, s2 +; CHECK-NEXT: snez a1, s0 ; CHECK-NEXT: beqz a2, .LBB23_6 ; CHECK-NEXT: .LBB23_12: # %entry ; CHECK-NEXT: sgtz a2, a2 @@ -1808,9 +1802,9 @@ ; CHECK-NEXT: .LBB23_14: # %entry ; CHECK-NEXT: bnez a1, .LBB23_16 ; CHECK-NEXT: # %bb.15: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB23_16: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -1840,12 +1834,12 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a0 @@ -1856,7 +1850,7 @@ ; CHECK-NEXT: slti a4, a1, 0 ; CHECK-NEXT: bnez s1, .LBB24_4 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: sltu a5, s2, a3 +; CHECK-NEXT: sltu a5, s0, a3 ; CHECK-NEXT: beqz a5, .LBB24_5 ; CHECK-NEXT: j .LBB24_6 ; CHECK-NEXT: .LBB24_3: @@ -1867,7 +1861,7 @@ ; CHECK-NEXT: bnez a5, .LBB24_6 ; CHECK-NEXT: .LBB24_5: # %entry ; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB24_6: # %entry ; CHECK-NEXT: beqz a4, .LBB24_10 ; CHECK-NEXT: # %bb.7: # %entry @@ -1877,7 +1871,7 @@ ; CHECK-NEXT: slt a1, a0, a1 ; CHECK-NEXT: bne s1, a0, .LBB24_12 ; CHECK-NEXT: .LBB24_9: -; CHECK-NEXT: sltu a0, a3, s2 +; CHECK-NEXT: sltu a0, a3, s0 ; CHECK-NEXT: beqz a0, .LBB24_13 ; CHECK-NEXT: j .LBB24_14 ; CHECK-NEXT: .LBB24_10: # %entry @@ -1892,13 +1886,13 @@ ; CHECK-NEXT: slt a0, a0, s1 ; CHECK-NEXT: bnez a0, .LBB24_14 ; CHECK-NEXT: .LBB24_13: # %entry -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB24_14: # %entry ; CHECK-NEXT: bnez a1, .LBB24_16 ; CHECK-NEXT: # %bb.15: # %entry ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB24_16: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1929,20 +1923,20 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv s2, a1 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: beqz a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB25_2: # %entry -; CHECK-NEXT: beqz s2, .LBB25_4 +; CHECK-NEXT: beqz s1, .LBB25_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB25_4: # %entry @@ -1974,13 +1968,13 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 @@ -2012,10 +2006,10 @@ ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: blez s1, .LBB26_4 ; CHECK-NEXT: .LBB26_10: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: bnez a3, .LBB26_5 ; CHECK-NEXT: .LBB26_11: -; CHECK-NEXT: snez a1, s2 +; CHECK-NEXT: snez a1, s0 ; CHECK-NEXT: beqz a2, .LBB26_6 ; CHECK-NEXT: .LBB26_12: # %entry ; CHECK-NEXT: sgtz a2, a2 @@ -2025,9 +2019,9 @@ ; CHECK-NEXT: .LBB26_14: # %entry ; CHECK-NEXT: bnez a1, .LBB26_16 ; CHECK-NEXT: # %bb.15: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB26_16: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2158,56 +2152,56 @@ ; CHECK-NEXT: fmv.w.x ft2, a3 ; CHECK-NEXT: fmv.w.x ft0, a2 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: lui a6, 524288 -; CHECK-NEXT: addiw a5, a6, -1 +; CHECK-NEXT: lui a4, 524288 +; CHECK-NEXT: addiw a6, a4, -1 ; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB30_2 +; CHECK-NEXT: blt a2, a6, .LBB30_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: mv a2, a6 ; CHECK-NEXT: .LBB30_2: # %entry ; CHECK-NEXT: fmv.w.x ft1, a1 ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB30_11 +; CHECK-NEXT: bge a3, a6, .LBB30_11 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB30_12 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: bge a1, a6, .LBB30_12 ; CHECK-NEXT: .LBB30_4: # %entry -; CHECK-NEXT: bge a4, a5, .LBB30_13 +; CHECK-NEXT: bge a5, a6, .LBB30_13 ; CHECK-NEXT: .LBB30_5: # %entry -; CHECK-NEXT: bge a6, a4, .LBB30_14 +; CHECK-NEXT: bge a4, a5, .LBB30_14 ; CHECK-NEXT: .LBB30_6: # %entry -; CHECK-NEXT: bge a6, a1, .LBB30_15 +; CHECK-NEXT: bge a4, a1, .LBB30_15 ; CHECK-NEXT: .LBB30_7: # %entry -; CHECK-NEXT: bge a6, a3, .LBB30_16 +; CHECK-NEXT: bge a4, a3, .LBB30_16 ; CHECK-NEXT: .LBB30_8: # %entry -; CHECK-NEXT: blt a6, a2, .LBB30_10 +; CHECK-NEXT: blt a4, a2, .LBB30_10 ; CHECK-NEXT: .LBB30_9: # %entry ; CHECK-NEXT: lui a2, 524288 ; CHECK-NEXT: .LBB30_10: # %entry ; CHECK-NEXT: sw a2, 12(a0) ; CHECK-NEXT: sw a3, 8(a0) ; CHECK-NEXT: sw a1, 4(a0) -; CHECK-NEXT: sw a4, 0(a0) +; CHECK-NEXT: sw a5, 0(a0) ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB30_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB30_4 +; CHECK-NEXT: mv a3, a6 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: blt a1, a6, .LBB30_4 ; CHECK-NEXT: .LBB30_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB30_5 +; CHECK-NEXT: mv a1, a6 +; CHECK-NEXT: blt a5, a6, .LBB30_5 ; CHECK-NEXT: .LBB30_13: # %entry -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: blt a6, a4, .LBB30_6 +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: blt a4, a5, .LBB30_6 ; CHECK-NEXT: .LBB30_14: # %entry -; CHECK-NEXT: lui a4, 524288 -; CHECK-NEXT: blt a6, a1, .LBB30_7 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: blt a4, a1, .LBB30_7 ; CHECK-NEXT: .LBB30_15: # %entry ; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: blt a6, a3, .LBB30_8 +; CHECK-NEXT: blt a4, a3, .LBB30_8 ; CHECK-NEXT: .LBB30_16: # %entry ; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: bge a6, a2, .LBB30_9 +; CHECK-NEXT: bge a4, a2, .LBB30_9 ; CHECK-NEXT: j .LBB30_10 entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -2346,23 +2340,23 @@ ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -2373,10 +2367,10 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB33_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: bge s0, a4, .LBB33_11 +; CHECK-NEXT: bge s3, a4, .LBB33_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: bge a2, a4, .LBB33_12 @@ -2387,16 +2381,16 @@ ; CHECK-NEXT: .LBB33_6: # %entry ; CHECK-NEXT: bge a1, a2, .LBB33_15 ; CHECK-NEXT: .LBB33_7: # %entry -; CHECK-NEXT: bge a1, s0, .LBB33_16 +; CHECK-NEXT: bge a1, s3, .LBB33_16 ; CHECK-NEXT: .LBB33_8: # %entry ; CHECK-NEXT: blt a1, a0, .LBB33_10 ; CHECK-NEXT: .LBB33_9: # %entry ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: .LBB33_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a2, 4(s3) -; CHECK-NEXT: sw a3, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a2, 4(s0) +; CHECK-NEXT: sw a3, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -2406,7 +2400,7 @@ ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB33_11: # %entry -; CHECK-NEXT: mv s0, a4 +; CHECK-NEXT: mv s3, a4 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: blt a2, a4, .LBB33_4 ; CHECK-NEXT: .LBB33_12: # %entry @@ -2420,9 +2414,9 @@ ; CHECK-NEXT: blt a1, a2, .LBB33_7 ; CHECK-NEXT: .LBB33_15: # %entry ; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: blt a1, s0, .LBB33_8 +; CHECK-NEXT: blt a1, s3, .LBB33_8 ; CHECK-NEXT: .LBB33_16: # %entry -; CHECK-NEXT: lui s0, 524288 +; CHECK-NEXT: lui s3, 524288 ; CHECK-NEXT: bge a1, a0, .LBB33_9 ; CHECK-NEXT: j .LBB33_10 entry: @@ -2451,23 +2445,23 @@ ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s3, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: fcvt.lu.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -2478,10 +2472,10 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz -; CHECK-NEXT: bgeu s1, a1, .LBB34_7 +; CHECK-NEXT: bgeu s3, a1, .LBB34_7 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bgeu a2, a1, .LBB34_8 @@ -2492,7 +2486,7 @@ ; CHECK-NEXT: .LBB34_6: # %entry ; CHECK-NEXT: sw a3, 12(s0) ; CHECK-NEXT: sw a2, 8(s0) -; CHECK-NEXT: sw s1, 4(s0) +; CHECK-NEXT: sw s3, 4(s0) ; CHECK-NEXT: sw a0, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2503,7 +2497,7 @@ ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB34_7: # %entry -; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv s3, a1 ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bltu a2, a1, .LBB34_4 ; CHECK-NEXT: .LBB34_8: # %entry @@ -2535,23 +2529,23 @@ ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -2562,10 +2556,10 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: .LBB35_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft1, rtz -; CHECK-NEXT: bge s0, a3, .LBB35_11 +; CHECK-NEXT: bge s3, a3, .LBB35_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: bge a1, a3, .LBB35_12 @@ -2576,16 +2570,16 @@ ; CHECK-NEXT: .LBB35_6: # %entry ; CHECK-NEXT: blez a1, .LBB35_15 ; CHECK-NEXT: .LBB35_7: # %entry -; CHECK-NEXT: blez s0, .LBB35_16 +; CHECK-NEXT: blez s3, .LBB35_16 ; CHECK-NEXT: .LBB35_8: # %entry ; CHECK-NEXT: bgtz a0, .LBB35_10 ; CHECK-NEXT: .LBB35_9: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB35_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a1, 4(s3) -; CHECK-NEXT: sw a2, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a1, 4(s0) +; CHECK-NEXT: sw a2, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -2595,7 +2589,7 @@ ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB35_11: # %entry -; CHECK-NEXT: mv s0, a3 +; CHECK-NEXT: mv s3, a3 ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: blt a1, a3, .LBB35_4 ; CHECK-NEXT: .LBB35_12: # %entry @@ -2609,9 +2603,9 @@ ; CHECK-NEXT: bgtz a1, .LBB35_7 ; CHECK-NEXT: .LBB35_15: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s0, .LBB35_8 +; CHECK-NEXT: bgtz s3, .LBB35_8 ; CHECK-NEXT: .LBB35_16: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li s3, 0 ; CHECK-NEXT: blez a0, .LBB35_9 ; CHECK-NEXT: j .LBB35_10 entry: @@ -2923,7 +2917,6 @@ ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -2934,23 +2927,19 @@ ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -2965,81 +2954,84 @@ ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB42_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB42_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB42_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB42_4 +; CHECK-NEXT: blt s7, a7, .LBB42_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB42_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB42_6 +; CHECK-NEXT: blt a1, a7, .LBB42_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB42_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB42_8 +; CHECK-NEXT: blt a2, a7, .LBB42_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB42_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB42_10 +; CHECK-NEXT: blt a3, a7, .LBB42_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB42_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB42_23 +; CHECK-NEXT: bge a4, a7, .LBB42_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB42_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB42_24 ; CHECK-NEXT: .LBB42_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB42_25 +; CHECK-NEXT: bge a6, a7, .LBB42_25 ; CHECK-NEXT: .LBB42_13: # %entry -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: bge s1, s0, .LBB42_26 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: bge a7, a6, .LBB42_26 ; CHECK-NEXT: .LBB42_14: # %entry -; CHECK-NEXT: bge s1, a5, .LBB42_27 +; CHECK-NEXT: bge a7, a5, .LBB42_27 ; CHECK-NEXT: .LBB42_15: # %entry -; CHECK-NEXT: bge s1, a4, .LBB42_28 +; CHECK-NEXT: bge a7, a4, .LBB42_28 ; CHECK-NEXT: .LBB42_16: # %entry -; CHECK-NEXT: bge s1, a3, .LBB42_29 +; CHECK-NEXT: bge a7, a3, .LBB42_29 ; CHECK-NEXT: .LBB42_17: # %entry -; CHECK-NEXT: bge s1, a2, .LBB42_30 +; CHECK-NEXT: bge a7, a2, .LBB42_30 ; CHECK-NEXT: .LBB42_18: # %entry -; CHECK-NEXT: bge s1, a1, .LBB42_31 +; CHECK-NEXT: bge a7, a1, .LBB42_31 ; CHECK-NEXT: .LBB42_19: # %entry -; CHECK-NEXT: bge s1, s9, .LBB42_32 +; CHECK-NEXT: bge a7, s7, .LBB42_32 ; CHECK-NEXT: .LBB42_20: # %entry -; CHECK-NEXT: blt s1, a0, .LBB42_22 +; CHECK-NEXT: blt a7, a0, .LBB42_22 ; CHECK-NEXT: .LBB42_21: # %entry ; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: .LBB42_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -3050,41 +3042,40 @@ ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB42_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB42_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB42_12 ; CHECK-NEXT: .LBB42_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB42_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB42_13 ; CHECK-NEXT: .LBB42_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: blt s1, s0, .LBB42_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: blt a7, a6, .LBB42_14 ; CHECK-NEXT: .LBB42_26: # %entry -; CHECK-NEXT: lui s0, 1048568 -; CHECK-NEXT: blt s1, a5, .LBB42_15 +; CHECK-NEXT: lui a6, 1048568 +; CHECK-NEXT: blt a7, a5, .LBB42_15 ; CHECK-NEXT: .LBB42_27: # %entry ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt s1, a4, .LBB42_16 +; CHECK-NEXT: blt a7, a4, .LBB42_16 ; CHECK-NEXT: .LBB42_28: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt s1, a3, .LBB42_17 +; CHECK-NEXT: blt a7, a3, .LBB42_17 ; CHECK-NEXT: .LBB42_29: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: blt s1, a2, .LBB42_18 +; CHECK-NEXT: blt a7, a2, .LBB42_18 ; CHECK-NEXT: .LBB42_30: # %entry ; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: blt s1, a1, .LBB42_19 +; CHECK-NEXT: blt a7, a1, .LBB42_19 ; CHECK-NEXT: .LBB42_31: # %entry ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt s1, s9, .LBB42_20 +; CHECK-NEXT: blt a7, s7, .LBB42_20 ; CHECK-NEXT: .LBB42_32: # %entry -; CHECK-NEXT: lui s9, 1048568 -; CHECK-NEXT: bge s1, a0, .LBB42_21 +; CHECK-NEXT: lui s7, 1048568 +; CHECK-NEXT: bge a7, a0, .LBB42_21 ; CHECK-NEXT: j .LBB42_22 entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -3109,7 +3100,6 @@ ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -3120,25 +3110,21 @@ ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s5, 0(a1) -; CHECK-NEXT: lhu s2, 56(a1) -; CHECK-NEXT: lhu s3, 48(a1) -; CHECK-NEXT: lhu s4, 40(a1) -; CHECK-NEXT: lhu s6, 32(a1) -; CHECK-NEXT: lhu s7, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 48(a1) +; CHECK-NEXT: lhu s3, 40(a1) +; CHECK-NEXT: lhu s4, 32(a1) +; CHECK-NEXT: lhu s6, 24(a1) +; CHECK-NEXT: lhu s7, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s8, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s6, a0 @@ -3151,72 +3137,75 @@ ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s6 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz ; CHECK-NEXT: fmv.w.x ft0, s8 ; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz -; CHECK-NEXT: sext.w s7, a0 +; CHECK-NEXT: sext.w s6, a0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz -; CHECK-NEXT: sext.w a6, a0 -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: bltu a6, a1, .LBB43_2 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addiw a1, a1, -1 +; CHECK-NEXT: bltu a0, a1, .LBB43_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB43_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s6 -; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: sext.w a2, s1 -; CHECK-NEXT: bltu s7, a1, .LBB43_4 +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz +; CHECK-NEXT: sext.w a2, s7 +; CHECK-NEXT: bltu s6, a1, .LBB43_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a1 +; CHECK-NEXT: mv s6, a1 ; CHECK-NEXT: .LBB43_4: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz -; CHECK-NEXT: sext.w a3, a0 +; CHECK-NEXT: sext.w a3, a3 ; CHECK-NEXT: bltu a2, a1, .LBB43_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB43_6: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz +; CHECK-NEXT: fmv.w.x ft0, s2 +; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz ; CHECK-NEXT: sext.w a4, a4 ; CHECK-NEXT: bltu a3, a1, .LBB43_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB43_8: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz -; CHECK-NEXT: sext.w a5, a0 +; CHECK-NEXT: fmv.w.x ft1, s1 +; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz +; CHECK-NEXT: sext.w a5, a5 ; CHECK-NEXT: bltu a4, a1, .LBB43_10 ; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB43_10: # %entry -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: sext.w s1, s1 +; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: sext.w a6, a6 ; CHECK-NEXT: bgeu a5, a1, .LBB43_15 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: sext.w a0, a0 -; CHECK-NEXT: bgeu s1, a1, .LBB43_16 +; CHECK-NEXT: sext.w a7, a7 +; CHECK-NEXT: bgeu a6, a1, .LBB43_16 ; CHECK-NEXT: .LBB43_12: # %entry -; CHECK-NEXT: bltu a0, a1, .LBB43_14 +; CHECK-NEXT: bltu a7, a1, .LBB43_14 ; CHECK-NEXT: .LBB43_13: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: .LBB43_14: # %entry -; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s1, 12(s0) +; CHECK-NEXT: sh a7, 14(s0) +; CHECK-NEXT: sh a6, 12(s0) ; CHECK-NEXT: sh a5, 10(s0) ; CHECK-NEXT: sh a4, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a2, 4(s0) -; CHECK-NEXT: sh s7, 2(s0) -; CHECK-NEXT: sh a6, 0(s0) +; CHECK-NEXT: sh s6, 2(s0) +; CHECK-NEXT: sh a0, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -3227,16 +3216,15 @@ ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB43_15: # %entry ; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: sext.w a0, a0 -; CHECK-NEXT: bltu s1, a1, .LBB43_12 +; CHECK-NEXT: sext.w a7, a7 +; CHECK-NEXT: bltu a6, a1, .LBB43_12 ; CHECK-NEXT: .LBB43_16: # %entry -; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: bgeu a0, a1, .LBB43_13 +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: bgeu a7, a1, .LBB43_13 ; CHECK-NEXT: j .LBB43_14 entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -3260,7 +3248,6 @@ ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -3271,23 +3258,19 @@ ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -3302,55 +3285,58 @@ ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB44_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB44_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB44_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB44_4 +; CHECK-NEXT: blt s7, a7, .LBB44_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB44_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB44_6 +; CHECK-NEXT: blt a1, a7, .LBB44_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB44_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB44_8 +; CHECK-NEXT: blt a2, a7, .LBB44_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB44_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB44_10 +; CHECK-NEXT: blt a3, a7, .LBB44_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB44_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB44_23 +; CHECK-NEXT: bge a4, a7, .LBB44_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB44_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB44_24 ; CHECK-NEXT: .LBB44_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB44_25 +; CHECK-NEXT: bge a6, a7, .LBB44_25 ; CHECK-NEXT: .LBB44_13: # %entry -; CHECK-NEXT: blez s0, .LBB44_26 +; CHECK-NEXT: blez a6, .LBB44_26 ; CHECK-NEXT: .LBB44_14: # %entry ; CHECK-NEXT: blez a5, .LBB44_27 ; CHECK-NEXT: .LBB44_15: # %entry @@ -3362,20 +3348,20 @@ ; CHECK-NEXT: .LBB44_18: # %entry ; CHECK-NEXT: blez a1, .LBB44_31 ; CHECK-NEXT: .LBB44_19: # %entry -; CHECK-NEXT: blez s9, .LBB44_32 +; CHECK-NEXT: blez s7, .LBB44_32 ; CHECK-NEXT: .LBB44_20: # %entry ; CHECK-NEXT: bgtz a0, .LBB44_22 ; CHECK-NEXT: .LBB44_21: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB44_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -3386,21 +3372,20 @@ ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB44_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB44_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB44_12 ; CHECK-NEXT: .LBB44_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB44_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB44_13 ; CHECK-NEXT: .LBB44_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: bgtz s0, .LBB44_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: bgtz a6, .LBB44_14 ; CHECK-NEXT: .LBB44_26: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: bgtz a5, .LBB44_15 ; CHECK-NEXT: .LBB44_27: # %entry ; CHECK-NEXT: li a5, 0 @@ -3416,9 +3401,9 @@ ; CHECK-NEXT: bgtz a1, .LBB44_19 ; CHECK-NEXT: .LBB44_31: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s9, .LBB44_20 +; CHECK-NEXT: bgtz s7, .LBB44_20 ; CHECK-NEXT: .LBB44_32: # %entry -; CHECK-NEXT: li s9, 0 +; CHECK-NEXT: li s7, 0 ; CHECK-NEXT: blez a0, .LBB44_21 ; CHECK-NEXT: j .LBB44_22 entry: @@ -3559,12 +3544,12 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunsdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a1 @@ -3581,7 +3566,7 @@ ; CHECK-NEXT: .LBB46_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB46_6 ; CHECK-NEXT: .LBB46_5: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: .LBB46_6: # %entry ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3593,7 +3578,7 @@ ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: beq s1, a1, .LBB46_4 ; CHECK-NEXT: .LBB46_8: # %entry -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB46_5 ; CHECK-NEXT: j .LBB46_6 entry: @@ -3616,11 +3601,11 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: li a5, 1 @@ -3673,11 +3658,11 @@ ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: blez s1, .LBB47_5 ; CHECK-NEXT: .LBB47_16: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: beq s1, a5, .LBB47_6 ; CHECK-NEXT: .LBB47_17: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bgtz a4, .LBB47_7 ; CHECK-NEXT: .LBB47_18: # %entry @@ -3827,12 +3812,12 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunssfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a1 @@ -3849,7 +3834,7 @@ ; CHECK-NEXT: .LBB49_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB49_6 ; CHECK-NEXT: .LBB49_5: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: .LBB49_6: # %entry ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3861,7 +3846,7 @@ ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: beq s1, a1, .LBB49_4 ; CHECK-NEXT: .LBB49_8: # %entry -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB49_5 ; CHECK-NEXT: j .LBB49_6 entry: @@ -3884,11 +3869,11 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: li a5, 1 @@ -3941,11 +3926,11 @@ ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: blez s1, .LBB50_5 ; CHECK-NEXT: .LBB50_16: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: beq s1, a5, .LBB50_6 ; CHECK-NEXT: .LBB50_17: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bgtz a4, .LBB50_7 ; CHECK-NEXT: .LBB50_18: # %entry @@ -4097,13 +4082,13 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv a2, a0 @@ -4121,7 +4106,7 @@ ; CHECK-NEXT: .LBB52_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB52_6 ; CHECK-NEXT: .LBB52_5: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: .LBB52_6: # %entry ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -4133,7 +4118,7 @@ ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: beq s1, a1, .LBB52_4 ; CHECK-NEXT: .LBB52_8: # %entry -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB52_5 ; CHECK-NEXT: j .LBB52_6 entry: @@ -4156,12 +4141,12 @@ ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 @@ -4215,11 +4200,11 @@ ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: blez s1, .LBB53_5 ; CHECK-NEXT: .LBB53_16: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: beq s1, a5, .LBB53_6 ; CHECK-NEXT: .LBB53_17: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bgtz a4, .LBB53_7 ; CHECK-NEXT: .LBB53_18: # %entry diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -31,13 +31,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -57,13 +57,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -97,13 +97,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -123,13 +123,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -163,13 +163,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -189,13 +189,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -229,13 +229,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -255,13 +255,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -461,27 +461,27 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s3, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lui a1, 1048568 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: slli a1, s2, 17 +; RV32I-NEXT: slli a1, s1, 17 ; RV32I-NEXT: srli a1, a1, 17 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -500,27 +500,27 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a1 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s3, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: lui a1, 1048568 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: slli a1, s2, 49 +; RV64I-NEXT: slli a1, s1, 49 ; RV64I-NEXT: srli a1, a1, 49 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -562,25 +562,25 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt @@ -599,25 +599,25 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: slli a0, a0, 33 ; RV64I-NEXT: srli a0, a0, 33 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt @@ -654,13 +654,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -680,13 +680,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -722,13 +722,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -748,13 +748,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -791,20 +791,20 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt @@ -824,20 +824,20 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt @@ -875,33 +875,32 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, a2, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s1, s0 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s4, s0 +; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -909,7 +908,6 @@ ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -921,33 +919,32 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, a2, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s1, s0 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s4, s0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -955,7 +952,6 @@ ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %c_ = fadd half 0.0, %c ; avoid negation using xor @@ -990,44 +986,44 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui s4, 524288 ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s2, s1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1048,44 +1044,44 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s2, s1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1130,44 +1126,44 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s1, a0, -1 -; RV32I-NEXT: and a0, a1, s1 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui s4, 524288 ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s2, s1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1188,44 +1184,44 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s1, a0, -1 -; RV64I-NEXT: and a0, a1, s1 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s2, s1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1267,32 +1263,31 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s1, s0 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s4, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1300,7 +1295,6 @@ ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1312,32 +1306,31 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s1, s0 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s4, s0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1345,7 +1338,6 @@ ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %a_ = fadd half 0.0, %a @@ -1377,33 +1369,32 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s1, s0 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s4, s0 +; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1411,7 +1402,6 @@ ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1423,33 +1413,32 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s1, s0 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s4, s0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1457,7 +1446,6 @@ ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %b_ = fadd half 0.0, %b @@ -1485,26 +1473,26 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1523,26 +1511,26 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1580,32 +1568,32 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, a2, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s3, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1624,32 +1612,32 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, a2, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s3, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1692,50 +1680,49 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s3, s1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1743,7 +1730,6 @@ ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1755,50 +1741,49 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s3, s1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1806,7 +1791,6 @@ ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %a_ = fadd half 0.0, %a ; avoid negation using xor @@ -1843,39 +1827,39 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s3, s0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1894,39 +1878,39 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s3, s0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -336,17 +336,17 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s2, a1, -1 -; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: and a0, a0, s0 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __fixunssfsi@plt ; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: bltz s0, .LBB3_2 +; RV32I-NEXT: bltz s2, .LBB3_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB3_2: # %start @@ -356,9 +356,9 @@ ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB3_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv s0, s3 ; RV32I-NEXT: .LBB3_4: # %start -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -376,17 +376,17 @@ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s2, a1, -1 -; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: and a0, a0, s0 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __fixunssfdi@plt ; RV64I-NEXT: li s3, 0 -; RV64I-NEXT: bltz s0, .LBB3_2 +; RV64I-NEXT: bltz s2, .LBB3_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB3_2: # %start @@ -396,9 +396,9 @@ ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: bgtz a0, .LBB3_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s0, s3 ; RV64I-NEXT: .LBB3_4: # %start -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1378,25 +1378,25 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: bltz s1, .LBB12_2 +; RV32I-NEXT: bltz s2, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: .LBB12_2: # %start ; RV32I-NEXT: lui a0, 391168 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: li s2, -1 ; RV32I-NEXT: li s3, -1 -; RV32I-NEXT: li s4, -1 ; RV32I-NEXT: bgtz a0, .LBB12_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: mv s3, s5 ; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 @@ -1404,17 +1404,17 @@ ; RV32I-NEXT: li s5, 0 ; RV32I-NEXT: bltz a0, .LBB12_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s5, s2 +; RV32I-NEXT: mv s5, s1 ; RV32I-NEXT: .LBB12_6: # %start ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB12_8 ; RV32I-NEXT: # %bb.7: # %start -; RV32I-NEXT: mv s3, s5 +; RV32I-NEXT: mv s2, s5 ; RV32I-NEXT: .LBB12_8: # %start -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -2507,12 +2507,12 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 815104 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: lui s3, 1048568 -; RV32I-NEXT: bltz s2, .LBB32_2 +; RV32I-NEXT: bltz s1, .LBB32_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB32_2: # %start @@ -2520,7 +2520,7 @@ ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB32_4 +; RV32I-NEXT: bge s2, a0, .LBB32_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 ; RV32I-NEXT: addi s3, a0, -1 @@ -2528,11 +2528,11 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB32_6 +; RV32I-NEXT: bne a0, s2, .LBB32_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB32_6: # %start -; RV32I-NEXT: slli a0, s1, 16 +; RV32I-NEXT: slli a0, s2, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -2556,12 +2556,12 @@ ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 815104 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB32_2 +; RV64I-NEXT: bltz s1, .LBB32_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB32_2: # %start @@ -2569,7 +2569,7 @@ ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB32_4 +; RV64I-NEXT: bge s2, a0, .LBB32_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 ; RV64I-NEXT: addiw s3, a0, -1 @@ -2577,11 +2577,11 @@ ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB32_6 +; RV64I-NEXT: bne a0, s2, .LBB32_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s2, s3 ; RV64I-NEXT: .LBB32_6: # %start -; RV64I-NEXT: slli a0, s1, 48 +; RV64I-NEXT: slli a0, s2, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2909,31 +2909,31 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 798720 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li s3, -128 -; RV32I-NEXT: bltz s2, .LBB36_2 +; RV32I-NEXT: bltz s1, .LBB36_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB36_2: # %start ; RV32I-NEXT: lui a1, 274400 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: li s2, 127 -; RV32I-NEXT: blt s1, a0, .LBB36_4 +; RV32I-NEXT: li s1, 127 +; RV32I-NEXT: blt s2, a0, .LBB36_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv s1, s3 ; RV32I-NEXT: .LBB36_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB36_6 +; RV32I-NEXT: bne a0, s2, .LBB36_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s2 +; RV32I-NEXT: mv s2, s1 ; RV32I-NEXT: .LBB36_6: # %start -; RV32I-NEXT: slli a0, s1, 24 +; RV32I-NEXT: slli a0, s2, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -2957,31 +2957,31 @@ ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 798720 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: li s3, -128 -; RV64I-NEXT: bltz s2, .LBB36_2 +; RV64I-NEXT: bltz s1, .LBB36_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB36_2: # %start ; RV64I-NEXT: lui a1, 274400 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: li s2, 127 -; RV64I-NEXT: blt s1, a0, .LBB36_4 +; RV64I-NEXT: li s1, 127 +; RV64I-NEXT: blt s2, a0, .LBB36_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s1, s3 ; RV64I-NEXT: .LBB36_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB36_6 +; RV64I-NEXT: bne a0, s2, .LBB36_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s2 +; RV64I-NEXT: mv s2, s1 ; RV64I-NEXT: .LBB36_6: # %start -; RV64I-NEXT: slli a0, s1, 56 +; RV64I-NEXT: slli a0, s2, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -398,24 +398,24 @@ ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: call sinf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call cosf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: and a0, s1, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -433,24 +433,24 @@ ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: call sinf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call cosf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: and a0, s1, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -523,13 +523,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -549,13 +549,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -982,20 +982,20 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt @@ -1015,20 +1015,20 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt @@ -1074,26 +1074,26 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1112,26 +1112,26 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1213,13 +1213,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -1239,13 +1239,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -1291,13 +1291,13 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -1317,13 +1317,13 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1141,50 +1141,50 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-LABEL: muli128_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 4(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a3, 8(a1) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: srli a6, a2, 20 -; RV32I-NEXT: slli a5, a3, 12 -; RV32I-NEXT: or a6, a5, a6 -; RV32I-NEXT: srli a7, a2, 24 -; RV32I-NEXT: slli a5, a3, 8 -; RV32I-NEXT: or a7, a5, a7 -; RV32I-NEXT: sltu t0, a7, a6 -; RV32I-NEXT: srli t1, a3, 20 -; RV32I-NEXT: slli a5, a1, 12 -; RV32I-NEXT: or a5, a5, t1 -; RV32I-NEXT: srli a3, a3, 24 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sub t2, a1, a5 +; RV32I-NEXT: lw a6, 0(a1) +; RV32I-NEXT: lw a5, 12(a1) ; RV32I-NEXT: srli a1, a4, 20 -; RV32I-NEXT: slli a3, a2, 12 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: srli a1, a4, 24 -; RV32I-NEXT: slli a2, a2, 8 -; RV32I-NEXT: or a5, a2, a1 -; RV32I-NEXT: slli t1, a4, 12 -; RV32I-NEXT: slli t3, a4, 8 -; RV32I-NEXT: sltu t4, t3, t1 -; RV32I-NEXT: sub t0, t2, t0 -; RV32I-NEXT: mv a2, t4 +; RV32I-NEXT: slli a2, a3, 12 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a4, 24 +; RV32I-NEXT: slli a7, a3, 8 +; RV32I-NEXT: or a2, a7, a2 +; RV32I-NEXT: sltu t0, a2, a1 +; RV32I-NEXT: srli a7, a3, 20 +; RV32I-NEXT: slli t1, a5, 12 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: sub t1, a3, a7 +; RV32I-NEXT: srli a3, a6, 20 +; RV32I-NEXT: slli a5, a4, 12 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a5, a4, a5 +; RV32I-NEXT: slli a4, a6, 12 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: sltu a7, a6, a4 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: mv t1, a7 ; RV32I-NEXT: beq a5, a3, .LBB30_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a2, a5, a3 +; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB30_2: -; RV32I-NEXT: sub a1, a7, a6 -; RV32I-NEXT: sltu a4, a1, a2 -; RV32I-NEXT: sub a4, t0, a4 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sub a2, a5, a3 -; RV32I-NEXT: sub a2, a2, t4 -; RV32I-NEXT: sub a3, t3, t1 -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sltu a2, a1, t1 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sub a1, a1, t1 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m3840: @@ -1192,56 +1192,54 @@ ; RV32IM-NEXT: addi sp, sp, -16 ; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a6, 12(a1) -; RV32IM-NEXT: lw a7, 8(a1) +; RV32IM-NEXT: lw a2, 12(a1) +; RV32IM-NEXT: lw a3, 8(a1) ; RV32IM-NEXT: lw a4, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) ; RV32IM-NEXT: lui a5, 1048575 ; RV32IM-NEXT: addi a5, a5, 256 -; RV32IM-NEXT: mulhu a2, a4, a5 -; RV32IM-NEXT: mul a3, a1, a5 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: sltu t0, a2, a3 -; RV32IM-NEXT: mulhu a3, a1, a5 -; RV32IM-NEXT: add t5, a3, t0 -; RV32IM-NEXT: sub t0, a2, a4 -; RV32IM-NEXT: neg t4, a4 -; RV32IM-NEXT: sltu t1, t0, t4 +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: mul a7, a1, a5 +; RV32IM-NEXT: add a6, a7, a6 +; RV32IM-NEXT: sltu a7, a6, a7 +; RV32IM-NEXT: mulhu t0, a1, a5 +; RV32IM-NEXT: add a7, t0, a7 +; RV32IM-NEXT: sub a6, a6, a4 +; RV32IM-NEXT: neg t0, a4 +; RV32IM-NEXT: sltu t1, a6, t0 ; RV32IM-NEXT: li t2, -1 ; RV32IM-NEXT: mulhu t3, a4, t2 -; RV32IM-NEXT: add a2, t3, t1 -; RV32IM-NEXT: add t1, t5, a2 -; RV32IM-NEXT: sub a3, t1, a1 -; RV32IM-NEXT: mul a2, a7, a5 -; RV32IM-NEXT: sub a2, a2, a4 -; RV32IM-NEXT: add t6, a3, a2 -; RV32IM-NEXT: sltu s2, t6, a3 +; RV32IM-NEXT: add t1, t3, t1 +; RV32IM-NEXT: add t1, a7, t1 +; RV32IM-NEXT: sub t4, t1, a1 +; RV32IM-NEXT: mul t5, a3, a5 +; RV32IM-NEXT: sub t5, t5, a4 +; RV32IM-NEXT: add t6, t4, t5 +; RV32IM-NEXT: sltu s0, t6, t4 ; RV32IM-NEXT: neg s1, a1 -; RV32IM-NEXT: sltu a3, a3, s1 -; RV32IM-NEXT: sltu s1, t1, t5 -; RV32IM-NEXT: mulhu s0, a1, t2 -; RV32IM-NEXT: add s1, s0, s1 -; RV32IM-NEXT: add a3, s1, a3 -; RV32IM-NEXT: sltu a2, a2, t4 -; RV32IM-NEXT: mul s1, a6, a5 -; RV32IM-NEXT: mulhu s0, a7, a5 -; RV32IM-NEXT: sub s0, s0, a7 -; RV32IM-NEXT: add s1, s0, s1 -; RV32IM-NEXT: sub s0, t3, a4 -; RV32IM-NEXT: sub a1, s0, a1 -; RV32IM-NEXT: add a1, a1, s1 +; RV32IM-NEXT: sltu t4, t4, s1 +; RV32IM-NEXT: sltu a7, t1, a7 +; RV32IM-NEXT: mulhu t1, a1, t2 +; RV32IM-NEXT: add a7, t1, a7 +; RV32IM-NEXT: add a7, a7, t4 +; RV32IM-NEXT: sltu t0, t5, t0 +; RV32IM-NEXT: mul a2, a2, a5 +; RV32IM-NEXT: mulhu t1, a3, a5 +; RV32IM-NEXT: sub a3, t1, a3 +; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: sub a3, t3, a4 +; RV32IM-NEXT: sub a1, a3, a1 ; RV32IM-NEXT: add a1, a1, a2 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: add a1, a1, s2 +; RV32IM-NEXT: add a1, a1, t0 +; RV32IM-NEXT: add a1, a7, a1 +; RV32IM-NEXT: add a1, a1, s0 ; RV32IM-NEXT: mul a2, a4, a5 ; RV32IM-NEXT: sw a2, 0(a0) -; RV32IM-NEXT: sw t0, 4(a0) +; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) ; RV32IM-NEXT: sw a1, 12(a0) ; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 16 ; RV32IM-NEXT: ret ; @@ -1279,39 +1277,39 @@ ; RV32I-LABEL: muli128_m63: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: slli a6, a2, 6 -; RV32I-NEXT: sltu a7, a2, a6 -; RV32I-NEXT: srli a1, a2, 26 -; RV32I-NEXT: slli a5, a3, 6 -; RV32I-NEXT: or t2, a5, a1 -; RV32I-NEXT: mv t3, a7 -; RV32I-NEXT: beq a3, t2, .LBB31_2 +; RV32I-NEXT: slli a1, a2, 6 +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: srli a6, a2, 26 +; RV32I-NEXT: slli t0, a3, 6 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: beq a3, a6, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t3, a3, t2 +; RV32I-NEXT: sltu t0, a3, a6 ; RV32I-NEXT: .LBB31_2: ; RV32I-NEXT: srli t1, a3, 26 -; RV32I-NEXT: slli a1, a4, 6 -; RV32I-NEXT: or a1, a1, t1 -; RV32I-NEXT: sub a5, a4, a1 -; RV32I-NEXT: sltu t1, a5, t3 -; RV32I-NEXT: sltu t4, a4, a1 -; RV32I-NEXT: srli a4, a4, 26 -; RV32I-NEXT: slli a1, t0, 6 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a1, a1, t4 -; RV32I-NEXT: sub a1, a1, t1 -; RV32I-NEXT: sub a4, a5, t3 -; RV32I-NEXT: sub a3, a3, t2 -; RV32I-NEXT: sub a3, a3, a7 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: slli t2, a7, 6 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: sub t2, a7, t1 +; RV32I-NEXT: sltu t3, t2, t0 +; RV32I-NEXT: sltu t1, a7, t1 +; RV32I-NEXT: srli a7, a7, 26 +; RV32I-NEXT: slli t4, a5, 6 +; RV32I-NEXT: or a7, t4, a7 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a5, a5, t1 +; RV32I-NEXT: sub a5, a5, t3 +; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: sub a3, a3, a6 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a7, 8(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m63: @@ -1319,55 +1317,55 @@ ; RV32IM-NEXT: addi sp, sp, -16 ; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a7, 12(a1) +; RV32IM-NEXT: lw a2, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a4, 4(a1) -; RV32IM-NEXT: lw t5, 8(a1) -; RV32IM-NEXT: li a6, -63 -; RV32IM-NEXT: mulhu a5, a3, a6 -; RV32IM-NEXT: slli a2, a4, 6 -; RV32IM-NEXT: sub a2, a2, a4 -; RV32IM-NEXT: sub a5, a5, a2 -; RV32IM-NEXT: neg a2, a2 -; RV32IM-NEXT: sltu t0, a5, a2 -; RV32IM-NEXT: mulhu a2, a4, a6 -; RV32IM-NEXT: add t4, a2, t0 -; RV32IM-NEXT: sub t0, a5, a3 -; RV32IM-NEXT: neg t1, a3 -; RV32IM-NEXT: sltu a5, t0, t1 +; RV32IM-NEXT: lw a1, 8(a1) +; RV32IM-NEXT: li a5, -63 +; RV32IM-NEXT: mulhu a6, a3, a5 +; RV32IM-NEXT: slli a7, a4, 6 +; RV32IM-NEXT: sub a7, a7, a4 +; RV32IM-NEXT: sub a6, a6, a7 +; RV32IM-NEXT: neg a7, a7 +; RV32IM-NEXT: sltu a7, a6, a7 +; RV32IM-NEXT: mulhu t0, a4, a5 +; RV32IM-NEXT: add a7, t0, a7 +; RV32IM-NEXT: sub a6, a6, a3 +; RV32IM-NEXT: neg t0, a3 +; RV32IM-NEXT: sltu t1, a6, t0 ; RV32IM-NEXT: li t2, -1 ; RV32IM-NEXT: mulhu t3, a3, t2 -; RV32IM-NEXT: add a5, t3, a5 -; RV32IM-NEXT: add a5, t4, a5 -; RV32IM-NEXT: sub a2, a5, a4 -; RV32IM-NEXT: slli a1, t5, 6 -; RV32IM-NEXT: sub a1, a1, t5 -; RV32IM-NEXT: add a1, a1, a3 -; RV32IM-NEXT: sub t6, a2, a1 -; RV32IM-NEXT: sltu s0, t6, a2 +; RV32IM-NEXT: add t1, t3, t1 +; RV32IM-NEXT: add t1, a7, t1 +; RV32IM-NEXT: sub t4, t1, a4 +; RV32IM-NEXT: slli t5, a1, 6 +; RV32IM-NEXT: sub t5, t5, a1 +; RV32IM-NEXT: add t5, t5, a3 +; RV32IM-NEXT: sub t6, t4, t5 +; RV32IM-NEXT: sltu s0, t6, t4 ; RV32IM-NEXT: neg s1, a4 -; RV32IM-NEXT: sltu a2, a2, s1 -; RV32IM-NEXT: sltu a5, a5, t4 -; RV32IM-NEXT: mulhu s1, a4, t2 -; RV32IM-NEXT: add a5, s1, a5 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: slli a5, a7, 6 -; RV32IM-NEXT: sub a5, a7, a5 -; RV32IM-NEXT: mulhu s1, t5, a6 -; RV32IM-NEXT: sub s1, s1, t5 -; RV32IM-NEXT: add a5, s1, a5 -; RV32IM-NEXT: sub s1, t3, a3 -; RV32IM-NEXT: sub a4, s1, a4 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: neg a1, a1 -; RV32IM-NEXT: sltu a1, a1, t1 -; RV32IM-NEXT: add a1, a4, a1 +; RV32IM-NEXT: sltu t4, t4, s1 +; RV32IM-NEXT: sltu a7, t1, a7 +; RV32IM-NEXT: mulhu t1, a4, t2 +; RV32IM-NEXT: add a7, t1, a7 +; RV32IM-NEXT: add a7, a7, t4 +; RV32IM-NEXT: slli t1, a2, 6 +; RV32IM-NEXT: sub a2, a2, t1 +; RV32IM-NEXT: mulhu a5, a1, a5 +; RV32IM-NEXT: sub a1, a5, a1 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: sub a2, t3, a3 +; RV32IM-NEXT: sub a2, a2, a4 ; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: neg a2, t5 +; RV32IM-NEXT: sltu a2, a2, t0 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 ; RV32IM-NEXT: slli a2, a3, 6 ; RV32IM-NEXT: sub a2, a3, a2 ; RV32IM-NEXT: sw a2, 0(a0) -; RV32IM-NEXT: sw t0, 4(a0) +; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) ; RV32IM-NEXT: sw a1, 12(a0) ; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload @@ -1417,60 +1415,60 @@ ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s5, a2 -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: srai s4, a3, 31 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add s1, a0, s1 -; RV32I-NEXT: sltu a0, s1, a0 +; RV32I-NEXT: add s5, a0, s5 +; RV32I-NEXT: sltu a0, s5, a0 ; RV32I-NEXT: add s7, a1, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add a2, a0, s1 +; RV32I-NEXT: add a2, a0, s5 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: add s8, s7, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: mv s9, a0 +; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: mv s6, a1 -; RV32I-NEXT: add s1, a0, s8 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: add s9, a0, s8 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s4 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add a3, a0, s0 -; RV32I-NEXT: add a2, s1, a3 -; RV32I-NEXT: sltu a4, a2, s1 -; RV32I-NEXT: sltu a5, s1, s9 -; RV32I-NEXT: sltu s1, s8, s7 -; RV32I-NEXT: add s1, s6, s1 -; RV32I-NEXT: add a5, s1, a5 -; RV32I-NEXT: add a1, a1, s5 +; RV32I-NEXT: add a3, a0, s2 +; RV32I-NEXT: add a2, s9, a3 +; RV32I-NEXT: sltu a4, a2, s9 +; RV32I-NEXT: sltu a5, s9, s5 +; RV32I-NEXT: sltu a6, s8, s7 +; RV32I-NEXT: add a6, s6, a6 +; RV32I-NEXT: add a5, a6, a5 +; RV32I-NEXT: add a1, a1, s3 ; RV32I-NEXT: sltu a0, a3, a0 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: add a0, a5, a0 @@ -1492,36 +1490,36 @@ ; ; RV32IM-LABEL: mulhsu_i64: ; RV32IM: # %bb.0: -; RV32IM-NEXT: srai a7, a3, 31 -; RV32IM-NEXT: mulhu a6, a0, a2 -; RV32IM-NEXT: mul a5, a1, a2 -; RV32IM-NEXT: add a4, a5, a6 -; RV32IM-NEXT: sltu a5, a4, a5 +; RV32IM-NEXT: srai a4, a3, 31 +; RV32IM-NEXT: mulhu a5, a0, a2 +; RV32IM-NEXT: mul a6, a1, a2 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: sltu a6, a5, a6 ; RV32IM-NEXT: mulhu a2, a1, a2 -; RV32IM-NEXT: add a6, a2, a5 +; RV32IM-NEXT: add a6, a2, a6 ; RV32IM-NEXT: mul a2, a0, a3 -; RV32IM-NEXT: add a4, a2, a4 -; RV32IM-NEXT: sltu a2, a4, a2 -; RV32IM-NEXT: mulhu a4, a0, a3 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: add a4, a6, a2 -; RV32IM-NEXT: mul a5, a1, a3 -; RV32IM-NEXT: add a2, a5, a4 -; RV32IM-NEXT: mul t1, a7, a0 -; RV32IM-NEXT: add t0, a2, t1 -; RV32IM-NEXT: sltu t2, t0, a2 -; RV32IM-NEXT: sltu a2, a2, a5 -; RV32IM-NEXT: sltu a4, a4, a6 +; RV32IM-NEXT: add a5, a2, a5 +; RV32IM-NEXT: sltu a2, a5, a2 +; RV32IM-NEXT: mulhu a5, a0, a3 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: add a5, a6, a2 +; RV32IM-NEXT: mul a7, a1, a3 +; RV32IM-NEXT: add t0, a7, a5 +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: add a2, t0, t1 +; RV32IM-NEXT: sltu t2, a2, t0 +; RV32IM-NEXT: sltu a7, t0, a7 +; RV32IM-NEXT: sltu a5, a5, a6 ; RV32IM-NEXT: mulhu a3, a1, a3 -; RV32IM-NEXT: add a3, a3, a4 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: mul a1, a7, a1 -; RV32IM-NEXT: mulhu a0, a7, a0 +; RV32IM-NEXT: add a3, a3, a5 +; RV32IM-NEXT: add a3, a3, a7 +; RV32IM-NEXT: mul a1, a4, a1 +; RV32IM-NEXT: mulhu a0, a4, a0 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: add a0, a0, t1 -; RV32IM-NEXT: add a0, a2, a0 +; RV32IM-NEXT: add a0, a3, a0 ; RV32IM-NEXT: add a1, a0, t2 -; RV32IM-NEXT: mv a0, t0 +; RV32IM-NEXT: mv a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: mulhsu_i64: diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll --- a/llvm/test/CodeGen/RISCV/remat.ll +++ b/llvm/test/CodeGen/RISCV/remat.ll @@ -37,16 +37,16 @@ ; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui s6, %hi(a) -; RV32I-NEXT: lw a0, %lo(a)(s6) +; RV32I-NEXT: lui s0, %hi(a) +; RV32I-NEXT: lw a0, %lo(a)(s0) ; RV32I-NEXT: beqz a0, .LBB0_11 ; RV32I-NEXT: # %bb.1: # %for.body.preheader -; RV32I-NEXT: lui s2, %hi(l) -; RV32I-NEXT: lui s3, %hi(k) -; RV32I-NEXT: lui s4, %hi(j) -; RV32I-NEXT: lui s5, %hi(i) -; RV32I-NEXT: lui s1, %hi(d) -; RV32I-NEXT: lui s0, %hi(e) +; RV32I-NEXT: lui s1, %hi(l) +; RV32I-NEXT: lui s2, %hi(k) +; RV32I-NEXT: lui s3, %hi(j) +; RV32I-NEXT: lui s4, %hi(i) +; RV32I-NEXT: lui s5, %hi(d) +; RV32I-NEXT: lui s6, %hi(e) ; RV32I-NEXT: lui s7, %hi(f) ; RV32I-NEXT: lui s8, %hi(g) ; RV32I-NEXT: lui s9, %hi(h) @@ -55,56 +55,56 @@ ; RV32I-NEXT: j .LBB0_3 ; RV32I-NEXT: .LBB0_2: # %for.inc ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(a)(s6) +; RV32I-NEXT: lw a0, %lo(a)(s0) ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: sw a0, %lo(a)(s6) +; RV32I-NEXT: sw a0, %lo(a)(s0) ; RV32I-NEXT: beqz a0, .LBB0_11 ; RV32I-NEXT: .LBB0_3: # %for.body ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: lw a1, %lo(l)(s2) +; RV32I-NEXT: lw a1, %lo(l)(s1) ; RV32I-NEXT: beqz a1, .LBB0_5 ; RV32I-NEXT: # %bb.4: # %if.then ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 ; RV32I-NEXT: lw a1, %lo(b)(s11) ; RV32I-NEXT: lw a2, %lo(c)(s10) -; RV32I-NEXT: lw a3, %lo(d)(s1) -; RV32I-NEXT: lw a4, %lo(e)(s0) +; RV32I-NEXT: lw a3, %lo(d)(s5) +; RV32I-NEXT: lw a4, %lo(e)(s6) ; RV32I-NEXT: li a5, 32 ; RV32I-NEXT: call foo@plt ; RV32I-NEXT: .LBB0_5: # %if.end ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(k)(s3) +; RV32I-NEXT: lw a0, %lo(k)(s2) ; RV32I-NEXT: beqz a0, .LBB0_7 ; RV32I-NEXT: # %bb.6: # %if.then3 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 ; RV32I-NEXT: lw a0, %lo(b)(s11) ; RV32I-NEXT: lw a1, %lo(c)(s10) -; RV32I-NEXT: lw a2, %lo(d)(s1) -; RV32I-NEXT: lw a3, %lo(e)(s0) +; RV32I-NEXT: lw a2, %lo(d)(s5) +; RV32I-NEXT: lw a3, %lo(e)(s6) ; RV32I-NEXT: lw a4, %lo(f)(s7) ; RV32I-NEXT: li a5, 64 ; RV32I-NEXT: call foo@plt ; RV32I-NEXT: .LBB0_7: # %if.end5 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(j)(s4) +; RV32I-NEXT: lw a0, %lo(j)(s3) ; RV32I-NEXT: beqz a0, .LBB0_9 ; RV32I-NEXT: # %bb.8: # %if.then7 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 ; RV32I-NEXT: lw a0, %lo(c)(s10) -; RV32I-NEXT: lw a1, %lo(d)(s1) -; RV32I-NEXT: lw a2, %lo(e)(s0) +; RV32I-NEXT: lw a1, %lo(d)(s5) +; RV32I-NEXT: lw a2, %lo(e)(s6) ; RV32I-NEXT: lw a3, %lo(f)(s7) ; RV32I-NEXT: lw a4, %lo(g)(s8) ; RV32I-NEXT: li a5, 32 ; RV32I-NEXT: call foo@plt ; RV32I-NEXT: .LBB0_9: # %if.end9 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(i)(s5) +; RV32I-NEXT: lw a0, %lo(i)(s4) ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.10: # %if.then11 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(d)(s1) -; RV32I-NEXT: lw a1, %lo(e)(s0) +; RV32I-NEXT: lw a0, %lo(d)(s5) +; RV32I-NEXT: lw a1, %lo(e)(s6) ; RV32I-NEXT: lw a2, %lo(f)(s7) ; RV32I-NEXT: lw a3, %lo(g)(s8) ; RV32I-NEXT: lw a4, %lo(h)(s9) diff --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll --- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll +++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll @@ -19,18 +19,18 @@ ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __divsf3@plt @@ -51,18 +51,18 @@ ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __divsf3@plt diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -183,18 +183,18 @@ ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a6, a4, a2 +; RV32I-NEXT: sll a5, a4, a2 ; RV32I-NEXT: bnez a3, .LBB7_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB7_4: ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: not a5, a2 -; RV32I-NEXT: srl a1, a1, a5 -; RV32I-NEXT: or a3, a6, a1 +; RV32I-NEXT: not a6, a2 +; RV32I-NEXT: srl a1, a1, a6 +; RV32I-NEXT: or a3, a5, a1 ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: srli a1, a4, 1 -; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: srl a1, a1, a6 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: ret @@ -208,18 +208,18 @@ ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a4, a0 ; RV32ZBB-NEXT: .LBB7_2: -; RV32ZBB-NEXT: sll a6, a4, a2 +; RV32ZBB-NEXT: sll a5, a4, a2 ; RV32ZBB-NEXT: bnez a3, .LBB7_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv a0, a1 ; RV32ZBB-NEXT: .LBB7_4: ; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: not a5, a2 -; RV32ZBB-NEXT: srl a1, a1, a5 -; RV32ZBB-NEXT: or a3, a6, a1 +; RV32ZBB-NEXT: not a6, a2 +; RV32ZBB-NEXT: srl a1, a1, a6 +; RV32ZBB-NEXT: or a3, a5, a1 ; RV32ZBB-NEXT: sll a0, a0, a2 ; RV32ZBB-NEXT: srli a1, a4, 1 -; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: srl a1, a1, a6 ; RV32ZBB-NEXT: or a1, a0, a1 ; RV32ZBB-NEXT: mv a0, a3 ; RV32ZBB-NEXT: ret @@ -233,18 +233,18 @@ ; RV32ZBP-NEXT: # %bb.1: ; RV32ZBP-NEXT: mv a4, a0 ; RV32ZBP-NEXT: .LBB7_2: -; RV32ZBP-NEXT: sll a6, a4, a2 +; RV32ZBP-NEXT: sll a5, a4, a2 ; RV32ZBP-NEXT: bnez a3, .LBB7_4 ; RV32ZBP-NEXT: # %bb.3: ; RV32ZBP-NEXT: mv a0, a1 ; RV32ZBP-NEXT: .LBB7_4: ; RV32ZBP-NEXT: srli a1, a0, 1 -; RV32ZBP-NEXT: not a5, a2 -; RV32ZBP-NEXT: srl a1, a1, a5 -; RV32ZBP-NEXT: or a3, a6, a1 +; RV32ZBP-NEXT: not a6, a2 +; RV32ZBP-NEXT: srl a1, a1, a6 +; RV32ZBP-NEXT: or a3, a5, a1 ; RV32ZBP-NEXT: sll a0, a0, a2 ; RV32ZBP-NEXT: srli a1, a4, 1 -; RV32ZBP-NEXT: srl a1, a1, a5 +; RV32ZBP-NEXT: srl a1, a1, a6 ; RV32ZBP-NEXT: or a1, a0, a1 ; RV32ZBP-NEXT: mv a0, a3 ; RV32ZBP-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -73,8 +73,8 @@ ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 @@ -88,14 +88,14 @@ ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s1, a1, 819 -; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -103,12 +103,12 @@ ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s0, a1, 257 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, s4, 1 -; RV32I-NEXT: or a0, s4, a0 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: srli a0, s2, 1 +; RV32I-NEXT: or a0, s2, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -119,24 +119,24 @@ ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s3, .LBB1_2 +; RV32I-NEXT: bnez s0, .LBB1_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB1_3 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s1, 24 ; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -227,21 +227,21 @@ ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: not a1, s2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -249,32 +249,32 @@ ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: addi a0, s3, -1 -; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a0, s1, -1 +; RV32I-NEXT: not a1, s1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s4, .LBB3_2 +; RV32I-NEXT: bnez s2, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s0, 24 ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -356,17 +356,17 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: addi s2, a2, 1365 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s3, a1, 819 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -378,12 +378,12 @@ ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3@plt ; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s2, a0 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, s0, 1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: sub a0, s0, a0 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -39,18 +39,18 @@ ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 1 ; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc1_i64: @@ -102,18 +102,18 @@ ; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 2 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc2_i64: @@ -181,34 +181,34 @@ ; RV32I-NEXT: slli a3, a0, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srli a5, a1, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 2 ; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 2 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc3_i64: @@ -266,18 +266,18 @@ ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc4_i64: @@ -345,34 +345,34 @@ ; RV32I-NEXT: slli a3, a0, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srli a5, a1, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 4 ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc5_i64: @@ -446,34 +446,34 @@ ; RV32I-NEXT: slli a3, a0, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 2 ; RV32I-NEXT: srli a5, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 4 ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc6_i64: @@ -563,50 +563,50 @@ ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 1 ; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a2, a1, 2 ; RV32I-NEXT: slli a3, a0, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 2 ; RV32I-NEXT: srli a5, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 4 ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc7_i64: @@ -670,18 +670,18 @@ ; RV32I-NEXT: slli a3, a1, 8 ; RV32I-NEXT: lui a4, 1044496 ; RV32I-NEXT: addi a4, a4, -256 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 8 ; RV32I-NEXT: srli a5, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: addi a3, a3, 255 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: addi a6, a6, 255 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc8_i64: @@ -830,30 +830,30 @@ ; RV32I-NEXT: slli a3, a0, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 -; RV32I-NEXT: and a7, a2, a4 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: srli a3, a1, 2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: and a5, a5, a2 +; RV32I-NEXT: srli a6, a1, 2 +; RV32I-NEXT: lui a7, 209715 +; RV32I-NEXT: addi a7, a7, 819 +; RV32I-NEXT: and a6, a6, a7 +; RV32I-NEXT: and a5, a5, a7 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: slli a3, a0, 2 -; RV32I-NEXT: slli a5, a1, 2 -; RV32I-NEXT: and a6, a5, a4 +; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 2 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: and a5, a5, a2 -; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: and a5, a5, a7 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc2b_i64: @@ -941,46 +941,46 @@ ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 -; RV32I-NEXT: and a7, a2, a4 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a5, a1, 1 -; RV32I-NEXT: srli a3, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: and a5, a5, a2 +; RV32I-NEXT: srli a6, a0, 1 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a6, a6, a7 +; RV32I-NEXT: and a5, a5, a7 ; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: slli a6, a1, 2 -; RV32I-NEXT: slli a5, a0, 2 -; RV32I-NEXT: lui a3, 838861 -; RV32I-NEXT: addi a3, a3, -820 -; RV32I-NEXT: and a7, a5, a3 -; RV32I-NEXT: and a6, a6, a3 -; RV32I-NEXT: srli t0, a0, 2 -; RV32I-NEXT: srli a3, a1, 2 -; RV32I-NEXT: lui a5, 209715 -; RV32I-NEXT: addi a5, a5, 819 +; RV32I-NEXT: or a0, a6, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a2, a1, 2 +; RV32I-NEXT: slli a3, a0, 2 +; RV32I-NEXT: lui a5, 838861 +; RV32I-NEXT: addi a5, a5, -820 ; RV32I-NEXT: and a3, a3, a5 -; RV32I-NEXT: and a5, t0, a5 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: srli a5, a0, 2 +; RV32I-NEXT: srli a6, a1, 2 +; RV32I-NEXT: lui t0, 209715 +; RV32I-NEXT: addi t0, t0, 819 +; RV32I-NEXT: and a6, a6, t0 +; RV32I-NEXT: and a5, a5, t0 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: slli a3, a0, 1 -; RV32I-NEXT: slli a5, a1, 1 -; RV32I-NEXT: and a6, a5, a4 +; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 1 ; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: and a5, a5, a2 -; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: and a5, a5, a7 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc3b_i64: @@ -1818,20 +1818,20 @@ ; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: slli a6, a1, 2 +; RV32I-NEXT: slli a2, a1, 2 ; RV32I-NEXT: slli a3, a0, 2 -; RV32I-NEXT: lui a2, 838861 -; RV32I-NEXT: addi a2, a2, -820 -; RV32I-NEXT: and a7, a3, a2 -; RV32I-NEXT: and a2, a6, a2 +; RV32I-NEXT: lui a6, 838861 +; RV32I-NEXT: addi a6, a6, -820 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: srli a1, a1, 2 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: or a0, a7, a0 +; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: slli a2, a0, 1 ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: and a3, a3, a4 @@ -1945,40 +1945,40 @@ ; RV32I-NEXT: and a1, a1, a5 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: slli a6, a0, 2 +; RV32I-NEXT: slli a2, a0, 2 ; RV32I-NEXT: slli a3, a1, 2 -; RV32I-NEXT: lui a2, 838861 -; RV32I-NEXT: addi a2, a2, -820 -; RV32I-NEXT: and a7, a3, a2 -; RV32I-NEXT: and a6, a6, a2 +; RV32I-NEXT: lui a6, 838861 +; RV32I-NEXT: addi a6, a6, -820 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: or t0, a6, a0 -; RV32I-NEXT: or a1, a7, a1 -; RV32I-NEXT: slli a6, a1, 1 -; RV32I-NEXT: slli a0, t0, 1 -; RV32I-NEXT: and a7, a0, a4 -; RV32I-NEXT: and a4, a6, a4 +; RV32I-NEXT: lui a7, 209715 +; RV32I-NEXT: addi a7, a7, 819 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: slli a2, a1, 1 +; RV32I-NEXT: slli a3, a0, 1 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a1, a1, 1 -; RV32I-NEXT: srli a0, t0, 1 +; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: or a0, a7, a0 -; RV32I-NEXT: slli a4, a0, 2 -; RV32I-NEXT: slli a5, a1, 2 -; RV32I-NEXT: and a5, a5, a2 -; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: grev0_i64: @@ -2275,13 +2275,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: srli a2, a1, 8 ; RV32I-NEXT: lui a3, 16 -; RV32I-NEXT: addi a7, a3, -256 -; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: lui a6, 4080 -; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: lui a5, 4080 +; RV32I-NEXT: and a4, a4, a5 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: or a1, a1, a2 @@ -2293,27 +2293,27 @@ ; RV32I-NEXT: slli a1, a1, 4 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 2 -; RV32I-NEXT: lui a5, 209715 -; RV32I-NEXT: addi a5, a5, 819 -; RV32I-NEXT: and a2, a2, a5 -; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: and a1, a1, a7 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or t0, a2, a1 +; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: and a3, a3, a5 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: and a1, a1, a4 @@ -2321,16 +2321,16 @@ ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: and a0, a0, a6 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a1, a1, a0 -; RV32I-NEXT: mv a0, t0 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: bitreverse_i64: @@ -2462,13 +2462,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: srli a3, a1, 8 ; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a7, a2, -256 -; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a3, a3, a2 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a4, a3, a4 ; RV32I-NEXT: slli a5, a1, 8 -; RV32I-NEXT: lui a6, 4080 -; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a5, a5, a3 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a4 @@ -2480,58 +2480,58 @@ ; RV32I-NEXT: slli a1, a1, 4 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: srli a4, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a4, a4, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: srli a4, a1, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 -; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: and a1, a1, a7 ; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: srli a4, a0, 8 -; RV32I-NEXT: and t0, a4, a7 -; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: or t0, t0, a4 -; RV32I-NEXT: slli a4, a0, 8 -; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: srli t0, a0, 24 +; RV32I-NEXT: or a4, a4, t0 +; RV32I-NEXT: slli t0, a0, 8 +; RV32I-NEXT: and t0, t0, a3 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: srli a4, a0, 4 ; RV32I-NEXT: and a4, a4, a5 ; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: srli a4, a0, 2 -; RV32I-NEXT: and a4, a4, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a0, a0, a6 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a3, a0, 1 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a4, a0, 1 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: srli a2, a0, 8 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: and a5, a5, a3 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a1, 8 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: slli a3, a1, 8 -; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srli a4, a1, 8 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: and a3, a4, a3 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: or a1, a1, a2 @@ -2584,16 +2584,16 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 629146 ; RV32I-NEXT: addi a2, a2, -1639 -; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a3, a0, a2 ; RV32I-NEXT: and a2, a1, a2 ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: slli a5, a0, 1 -; RV32I-NEXT: lui a3, 279620 -; RV32I-NEXT: addi a3, a3, 1092 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: or a3, a6, a5 +; RV32I-NEXT: lui a6, 279620 +; RV32I-NEXT: addi a6, a6, 1092 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: srli a1, a1, 1 ; RV32I-NEXT: lui a4, 139810 @@ -2656,16 +2656,16 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 801852 ; RV32I-NEXT: addi a2, a2, 963 -; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a3, a0, a2 ; RV32I-NEXT: and a2, a1, a2 ; RV32I-NEXT: slli a4, a1, 2 ; RV32I-NEXT: slli a5, a0, 2 -; RV32I-NEXT: lui a3, 197379 -; RV32I-NEXT: addi a3, a3, 48 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: or a3, a6, a5 +; RV32I-NEXT: lui a6, 197379 +; RV32I-NEXT: addi a6, a6, 48 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: srli a1, a1, 2 ; RV32I-NEXT: lui a4, 49345 @@ -2728,24 +2728,24 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 983295 ; RV32I-NEXT: addi a2, a2, 15 -; RV32I-NEXT: and a6, a1, a2 +; RV32I-NEXT: and a3, a1, a2 ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: slli a4, a1, 4 ; RV32I-NEXT: slli a5, a0, 4 -; RV32I-NEXT: lui a3, 61441 -; RV32I-NEXT: addi a3, a3, -256 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: lui a6, 61441 +; RV32I-NEXT: addi a6, a6, -256 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 ; RV32I-NEXT: srli a1, a1, 4 ; RV32I-NEXT: srli a0, a0, 4 -; RV32I-NEXT: lui a4, 3840 -; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: and a1, a1, a4 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 3840 +; RV32I-NEXT: addi a6, a6, 240 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: shfl4_i64: @@ -2799,22 +2799,22 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 1044480 ; RV32I-NEXT: addi a2, a2, 255 -; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a3, a0, a2 ; RV32I-NEXT: and a2, a1, a2 ; RV32I-NEXT: slli a4, a0, 8 ; RV32I-NEXT: slli a5, a1, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: lui a4, 16 -; RV32I-NEXT: addi a4, a4, -256 -; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: lui a6, 16 +; RV32I-NEXT: addi a6, a6, -256 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rv32zbt.ll b/llvm/test/CodeGen/RISCV/rv32zbt.ll --- a/llvm/test/CodeGen/RISCV/rv32zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbt.ll @@ -341,14 +341,14 @@ ; RV32I-LABEL: fshl_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: srli a5, a4, 5 -; RV32I-NEXT: andi a5, a5, 1 -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: bnez a5, .LBB13_2 +; RV32I-NEXT: andi a6, a5, 1 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: bnez a6, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: mv a5, a0 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: sll a7, a6, a4 -; RV32I-NEXT: bnez a5, .LBB13_4 +; RV32I-NEXT: sll a7, a5, a4 +; RV32I-NEXT: bnez a6, .LBB13_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB13_4: @@ -356,12 +356,12 @@ ; RV32I-NEXT: not a3, a4 ; RV32I-NEXT: srl a2, a2, a3 ; RV32I-NEXT: or a2, a7, a2 -; RV32I-NEXT: bnez a5, .LBB13_6 +; RV32I-NEXT: bnez a6, .LBB13_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB13_6: ; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: srli a1, a6, 1 +; RV32I-NEXT: srli a1, a5, 1 ; RV32I-NEXT: srl a1, a1, a3 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 @@ -420,24 +420,24 @@ ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a6, a2, a4 +; RV32I-NEXT: srl a2, a2, a4 ; RV32I-NEXT: beqz a5, .LBB15_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a3, a0 ; RV32I-NEXT: .LBB15_4: ; RV32I-NEXT: slli a7, a3, 1 -; RV32I-NEXT: not t0, a4 -; RV32I-NEXT: sll a2, a7, t0 -; RV32I-NEXT: or a6, a2, a6 +; RV32I-NEXT: not a6, a4 +; RV32I-NEXT: sll a7, a7, a6 +; RV32I-NEXT: or a2, a7, a2 ; RV32I-NEXT: srl a3, a3, a4 ; RV32I-NEXT: beqz a5, .LBB15_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB15_6: ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: sll a0, a0, t0 +; RV32I-NEXT: sll a0, a0, a6 ; RV32I-NEXT: or a1, a0, a3 -; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32ZBT-LABEL: fshr_i64: diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll --- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll @@ -13,15 +13,15 @@ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; CHECK-NEXT: srli s2, a0, 32 +; CHECK-NEXT: srli s0, a0, 32 ; CHECK-NEXT: srli s1, a1, 32 ; CHECK-NEXT: call __addsf3@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __addsf3@plt ; CHECK-NEXT: slli a0, a0, 32 -; CHECK-NEXT: slli a1, s0, 32 +; CHECK-NEXT: slli a1, s2, 32 ; CHECK-NEXT: srli a1, a1, 32 ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -2408,38 +2408,38 @@ ; RV64I-LABEL: bitreverse_bswap_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a6, 4080 -; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a3, a0, 8 ; RV64I-NEXT: li a4, 255 -; RV64I-NEXT: slli a7, a4, 24 -; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: slli a5, a4, 24 +; RV64I-NEXT: and a3, a3, a5 ; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a3, a3, a2 -; RV64I-NEXT: srli a5, a0, 56 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: lui a6, 16 +; RV64I-NEXT: addiw a6, a6, -256 +; RV64I-NEXT: and a3, a3, a6 +; RV64I-NEXT: srli a7, a0, 56 +; RV64I-NEXT: or a3, a3, a7 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli t0, a4, 40 -; RV64I-NEXT: and a3, a3, t0 -; RV64I-NEXT: srliw a5, a0, 24 -; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a5, a0, 40 +; RV64I-NEXT: slli a7, a4, 40 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: srliw t0, a0, 24 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: slli t0, a0, 40 ; RV64I-NEXT: slli a4, a4, 48 -; RV64I-NEXT: and a5, a5, a4 +; RV64I-NEXT: and t0, t0, a4 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: lui a5, %hi(.LCPI68_0) -; RV64I-NEXT: ld a5, %lo(.LCPI68_0)(a5) +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: lui t0, %hi(.LCPI68_0) +; RV64I-NEXT: ld t0, %lo(.LCPI68_0)(t0) ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a1, a1, a5 -; RV64I-NEXT: and a0, a0, a5 +; RV64I-NEXT: and a1, a1, t0 +; RV64I-NEXT: and a0, a0, t0 ; RV64I-NEXT: lui a3, %hi(.LCPI68_1) ; RV64I-NEXT: ld a3, %lo(.LCPI68_1)(a3) ; RV64I-NEXT: slli a0, a0, 4 @@ -2457,17 +2457,17 @@ ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 56 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 24 -; RV64I-NEXT: and a2, a2, a6 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: and a3, a3, a5 ; RV64I-NEXT: or a2, a3, a2 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: slli a2, a0, 24 -; RV64I-NEXT: and a2, a2, t0 +; RV64I-NEXT: and a2, a2, a7 ; RV64I-NEXT: srliw a3, a0, 24 ; RV64I-NEXT: slli a3, a3, 32 ; RV64I-NEXT: or a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -103,76 +103,76 @@ ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a6, a2, 3 -; CHECK-NEXT: add a4, a1, a6 +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a4, a1, a3 ; CHECK-NEXT: vl8re32.v v8, (a4) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a3, a3, a4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: slli a7, a2, 4 -; CHECK-NEXT: add a5, a1, a7 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a5, a1, a4 ; CHECK-NEXT: vl8re32.v v8, (a5) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 4 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul t1, a2, a5 -; CHECK-NEXT: add a3, a1, t1 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: slli t3, a2, 5 -; CHECK-NEXT: add a4, a1, t3 -; CHECK-NEXT: vl8re32.v v8, (a4) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a4, a2, a4 -; CHECK-NEXT: add t0, a1, a4 -; CHECK-NEXT: li a5, 48 ; CHECK-NEXT: mul a5, a2, a5 -; CHECK-NEXT: add t2, a1, a5 -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: add a6, a1, a5 +; CHECK-NEXT: vl8re32.v v8, (a6) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 3 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: slli a6, a2, 5 +; CHECK-NEXT: add a7, a1, a6 +; CHECK-NEXT: vl8re32.v v8, (a7) +; CHECK-NEXT: addi a7, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill +; CHECK-NEXT: li a7, 40 +; CHECK-NEXT: mul a7, a2, a7 +; CHECK-NEXT: add t0, a1, a7 +; CHECK-NEXT: li t1, 48 +; CHECK-NEXT: mul t1, a2, t1 +; CHECK-NEXT: add t2, a1, t1 +; CHECK-NEXT: li t3, 56 +; CHECK-NEXT: mul a2, a2, t3 +; CHECK-NEXT: add t3, a1, a2 ; CHECK-NEXT: vl8re32.v v8, (a1) ; CHECK-NEXT: vl8re32.v v0, (t0) -; CHECK-NEXT: vl8re32.v v16, (a3) +; CHECK-NEXT: vl8re32.v v16, (t3) ; CHECK-NEXT: vl8re32.v v24, (t2) ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: add a1, a0, a2 ; CHECK-NEXT: vs8r.v v16, (a1) -; CHECK-NEXT: add a1, a0, a5 +; CHECK-NEXT: add a1, a0, t1 ; CHECK-NEXT: vs8r.v v24, (a1) -; CHECK-NEXT: add a1, a0, a4 +; CHECK-NEXT: add a1, a0, a7 ; CHECK-NEXT: vs8r.v v0, (a1) -; CHECK-NEXT: add a1, a0, t3 +; CHECK-NEXT: add a1, a0, a6 ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: add a1, a0, t1 +; CHECK-NEXT: add a1, a0, a5 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: add a1, a0, a7 +; CHECK-NEXT: add a1, a0, a4 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: add a0, a0, a6 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefix=CHECK-ASM @@ -32,17 +33,17 @@ ; CHECK-ASM-LABEL: gather: ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, 5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB0_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-ASM-NEXT: vlse8.v v8, (a1), a4 -; CHECK-ASM-NEXT: add a3, a0, a2 -; CHECK-ASM-NEXT: vle8.v v9, (a3) +; CHECK-ASM-NEXT: add a6, a0, a2 +; CHECK-ASM-NEXT: vle8.v v9, (a6) ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 -; CHECK-ASM-NEXT: vse8.v v8, (a3) +; CHECK-ASM-NEXT: vse8.v v8, (a6) ; CHECK-ASM-NEXT: addi a2, a2, 32 ; CHECK-ASM-NEXT: addi a1, a1, 160 ; CHECK-ASM-NEXT: bne a2, a5, .LBB0_1 @@ -101,18 +102,18 @@ ; CHECK-ASM-NEXT: addiw a3, a3, 873 ; CHECK-ASM-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-ASM-NEXT: vmv.s.x v0, a3 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, 5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB1_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-ASM-NEXT: vmv1r.v v9, v8 ; CHECK-ASM-NEXT: vlse8.v v9, (a1), a4, v0.t -; CHECK-ASM-NEXT: add a3, a0, a2 -; CHECK-ASM-NEXT: vle8.v v10, (a3) +; CHECK-ASM-NEXT: add a6, a0, a2 +; CHECK-ASM-NEXT: vle8.v v10, (a6) ; CHECK-ASM-NEXT: vadd.vv v9, v10, v9 -; CHECK-ASM-NEXT: vse8.v v9, (a3) +; CHECK-ASM-NEXT: vse8.v v9, (a6) ; CHECK-ASM-NEXT: addi a2, a2, 32 ; CHECK-ASM-NEXT: addi a1, a1, 160 ; CHECK-ASM-NEXT: bne a2, a5, .LBB1_1 @@ -168,17 +169,17 @@ ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 ; CHECK-ASM-NEXT: addi a1, a1, 155 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, -5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB2_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-ASM-NEXT: vlse8.v v8, (a1), a4 -; CHECK-ASM-NEXT: add a3, a0, a2 -; CHECK-ASM-NEXT: vle8.v v9, (a3) +; CHECK-ASM-NEXT: add a6, a0, a2 +; CHECK-ASM-NEXT: vle8.v v9, (a6) ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 -; CHECK-ASM-NEXT: vse8.v v8, (a3) +; CHECK-ASM-NEXT: vse8.v v8, (a6) ; CHECK-ASM-NEXT: addi a2, a2, 32 ; CHECK-ASM-NEXT: addi a1, a1, 160 ; CHECK-ASM-NEXT: bne a2, a5, .LBB2_1 @@ -303,14 +304,14 @@ ; CHECK-ASM-LABEL: scatter: ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, 5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB4_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: add a3, a1, a2 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu -; CHECK-ASM-NEXT: vle8.v v8, (a3) +; CHECK-ASM-NEXT: add a6, a1, a2 +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu +; CHECK-ASM-NEXT: vle8.v v8, (a6) ; CHECK-ASM-NEXT: vlse8.v v9, (a0), a4 ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 ; CHECK-ASM-NEXT: vsse8.v v8, (a0), a4 @@ -369,7 +370,7 @@ ; CHECK-ASM-LABEL: scatter_masked: ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: lui a4, 983765 ; CHECK-ASM-NEXT: addiw a4, a4, 873 ; CHECK-ASM-NEXT: vsetivli zero, 1, e32, mf2, ta, mu @@ -378,9 +379,9 @@ ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB5_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: add a3, a1, a2 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu -; CHECK-ASM-NEXT: vle8.v v9, (a3) +; CHECK-ASM-NEXT: add a6, a1, a2 +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu +; CHECK-ASM-NEXT: vle8.v v9, (a6) ; CHECK-ASM-NEXT: vmv1r.v v10, v8 ; CHECK-ASM-NEXT: vlse8.v v10, (a0), a4, v0.t ; CHECK-ASM-NEXT: vadd.vv v9, v10, v9 @@ -1047,43 +1048,43 @@ ; CHECK-ASM-NEXT: # %bb.2: ; CHECK-ASM-NEXT: slli a3, a4, 32 ; CHECK-ASM-NEXT: srli a3, a3, 32 -; CHECK-ASM-NEXT: addi a6, a3, 1 -; CHECK-ASM-NEXT: andi a7, a6, -32 -; CHECK-ASM-NEXT: add a3, a7, a2 -; CHECK-ASM-NEXT: slli a4, a2, 2 -; CHECK-ASM-NEXT: add a4, a4, a2 +; CHECK-ASM-NEXT: addi a4, a3, 1 +; CHECK-ASM-NEXT: andi a5, a4, -32 +; CHECK-ASM-NEXT: add a3, a5, a2 +; CHECK-ASM-NEXT: slli a6, a2, 2 +; CHECK-ASM-NEXT: add a6, a6, a2 ; CHECK-ASM-NEXT: add a2, a0, a2 -; CHECK-ASM-NEXT: add a4, a1, a4 -; CHECK-ASM-NEXT: li t0, 32 -; CHECK-ASM-NEXT: li t1, 5 -; CHECK-ASM-NEXT: mv a5, a7 +; CHECK-ASM-NEXT: add a6, a1, a6 +; CHECK-ASM-NEXT: li a7, 32 +; CHECK-ASM-NEXT: li t0, 5 +; CHECK-ASM-NEXT: mv t1, a5 ; CHECK-ASM-NEXT: .LBB12_3: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, t0, e8, m1, ta, mu -; CHECK-ASM-NEXT: vlse8.v v8, (a4), t1 +; CHECK-ASM-NEXT: vsetvli zero, a7, e8, m1, ta, mu +; CHECK-ASM-NEXT: vlse8.v v8, (a6), t0 ; CHECK-ASM-NEXT: vle8.v v9, (a2) ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 ; CHECK-ASM-NEXT: vse8.v v8, (a2) -; CHECK-ASM-NEXT: addi a5, a5, -32 +; CHECK-ASM-NEXT: addi t1, t1, -32 ; CHECK-ASM-NEXT: addi a2, a2, 32 -; CHECK-ASM-NEXT: addi a4, a4, 160 -; CHECK-ASM-NEXT: bnez a5, .LBB12_3 +; CHECK-ASM-NEXT: addi a6, a6, 160 +; CHECK-ASM-NEXT: bnez t1, .LBB12_3 ; CHECK-ASM-NEXT: # %bb.4: -; CHECK-ASM-NEXT: beq a6, a7, .LBB12_7 +; CHECK-ASM-NEXT: beq a4, a5, .LBB12_7 ; CHECK-ASM-NEXT: .LBB12_5: ; CHECK-ASM-NEXT: slli a2, a3, 2 ; CHECK-ASM-NEXT: add a2, a2, a3 ; CHECK-ASM-NEXT: add a1, a1, a2 -; CHECK-ASM-NEXT: li a6, 1024 +; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: .LBB12_6: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: lb a4, 0(a1) ; CHECK-ASM-NEXT: add a5, a0, a3 -; CHECK-ASM-NEXT: lb a2, 0(a5) -; CHECK-ASM-NEXT: addw a2, a2, a4 -; CHECK-ASM-NEXT: sb a2, 0(a5) -; CHECK-ASM-NEXT: addiw a2, a3, 1 +; CHECK-ASM-NEXT: lb a6, 0(a5) +; CHECK-ASM-NEXT: addw a4, a6, a4 +; CHECK-ASM-NEXT: sb a4, 0(a5) +; CHECK-ASM-NEXT: addiw a4, a3, 1 ; CHECK-ASM-NEXT: addi a3, a3, 1 ; CHECK-ASM-NEXT: addi a1, a1, 5 -; CHECK-ASM-NEXT: bne a2, a6, .LBB12_6 +; CHECK-ASM-NEXT: bne a4, a2, .LBB12_6 ; CHECK-ASM-NEXT: .LBB12_7: ; CHECK-ASM-NEXT: ret %4 = icmp eq i32 %2, 1024 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -866,8 +866,8 @@ ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: lui a6, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV32-NEXT: lui a3, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 @@ -886,10 +886,10 @@ ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 349525 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV32-NEXT: lui a6, 349525 +; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 @@ -897,7 +897,7 @@ ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 @@ -912,8 +912,8 @@ ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) @@ -933,8 +933,8 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: lui a6, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: lui a3, 4080 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -953,10 +953,10 @@ ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 349525 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: lui a6, 349525 +; LMULMAX1-RV64-NEXT: addiw a6, a6, 1365 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 @@ -964,7 +964,7 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 @@ -979,8 +979,8 @@ ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) @@ -1153,23 +1153,23 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v12, 24 -; LMULMAX1-RV32-NEXT: lui a6, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a6 -; LMULMAX1-RV32-NEXT: li a5, 5 +; LMULMAX1-RV32-NEXT: lui a5, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a5 +; LMULMAX1-RV32-NEXT: li a6, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: lui a5, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a5, v0 +; LMULMAX1-RV32-NEXT: lui a6, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a6, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 8 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v9 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 ; LMULMAX1-RV32-NEXT: vor.vv v13, v11, v10 -; LMULMAX1-RV32-NEXT: li a5, 255 +; LMULMAX1-RV32-NEXT: li a6, 255 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a6 ; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsll.vi v11, v12, 8 @@ -1183,7 +1183,7 @@ ; LMULMAX1-RV32-NEXT: vor.vv v14, v15, v14 ; LMULMAX1-RV32-NEXT: vsll.vx v15, v12, a3 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v16, a6 +; LMULMAX1-RV32-NEXT: vmv.v.x v16, a5 ; LMULMAX1-RV32-NEXT: vmerge.vim v16, v16, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v16 @@ -1192,30 +1192,30 @@ ; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v14 ; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 4 -; LMULMAX1-RV32-NEXT: lui a5, 61681 -; LMULMAX1-RV32-NEXT: addi a5, a5, -241 +; LMULMAX1-RV32-NEXT: lui a6, 61681 +; LMULMAX1-RV32-NEXT: addi a6, a6, -241 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v14 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v14 ; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 4 ; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 2 -; LMULMAX1-RV32-NEXT: lui a5, 209715 -; LMULMAX1-RV32-NEXT: addi a5, a5, 819 +; LMULMAX1-RV32-NEXT: lui a6, 209715 +; LMULMAX1-RV32-NEXT: addi a6, a6, 819 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v15, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v15, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v15 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v15 ; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 2 ; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 1 -; LMULMAX1-RV32-NEXT: lui a5, 349525 -; LMULMAX1-RV32-NEXT: addi a5, a5, 1365 +; LMULMAX1-RV32-NEXT: lui a6, 349525 +; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v17, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v17, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v17 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v17 @@ -1226,7 +1226,7 @@ ; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a4 ; LMULMAX1-RV32-NEXT: vor.vv v13, v18, v13 ; LMULMAX1-RV32-NEXT: vsrl.vi v18, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a6 +; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a5 ; LMULMAX1-RV32-NEXT: vsrl.vi v19, v8, 8 ; LMULMAX1-RV32-NEXT: vand.vv v9, v19, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v18 @@ -1264,99 +1264,99 @@ ; LMULMAX1-RV64-LABEL: bitreverse_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a7, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a7) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: li t0, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, t0 -; LMULMAX1-RV64-NEXT: li t1, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, t1 +; LMULMAX1-RV64-NEXT: li a2, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV64-NEXT: li a3, 40 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3 ; LMULMAX1-RV64-NEXT: lui a4, 16 -; LMULMAX1-RV64-NEXT: addiw t2, a4, -256 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t2 +; LMULMAX1-RV64-NEXT: addiw a4, a4, -256 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: lui a6, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: lui a5, 4080 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: li a5, 255 -; LMULMAX1-RV64-NEXT: slli t3, a5, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t3 +; LMULMAX1-RV64-NEXT: li a6, 255 +; LMULMAX1-RV64-NEXT: slli a7, a6, 24 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: slli t4, a5, 32 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 +; LMULMAX1-RV64-NEXT: slli t0, a6, 32 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: slli a3, a5, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 +; LMULMAX1-RV64-NEXT: slli t1, a6, 40 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, t0 -; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, t1 -; LMULMAX1-RV64-NEXT: slli a5, a5, 48 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: slli a6, a6, 48 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 -; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI5_0) -; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI5_0)(a4) +; LMULMAX1-RV64-NEXT: lui t2, %hi(.LCPI5_0) +; LMULMAX1-RV64-NEXT: ld t2, %lo(.LCPI5_0)(t2) ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI5_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t2 +; LMULMAX1-RV64-NEXT: lui t3, %hi(.LCPI5_1) +; LMULMAX1-RV64-NEXT: ld t3, %lo(.LCPI5_1)(t3) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI5_2) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI5_2)(a2) +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t3 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t3 +; LMULMAX1-RV64-NEXT: lui t4, %hi(.LCPI5_2) +; LMULMAX1-RV64-NEXT: ld t4, %lo(.LCPI5_2)(t4) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t4 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, t0 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, t1 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t2 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t3 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, t0 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, t1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t2 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t4 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a7) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -595,8 +595,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a6, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a6 +; LMULMAX1-RV32-NEXT: li a2, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 ; LMULMAX1-RV32-NEXT: li a3, 40 ; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 ; LMULMAX1-RV32-NEXT: lui a4, 16 @@ -606,21 +606,21 @@ ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: li a2, 5 +; LMULMAX1-RV32-NEXT: li a6, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v12, v12, a2, v0 +; LMULMAX1-RV32-NEXT: lui a6, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v12, v12, a6, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: li a2, 255 +; LMULMAX1-RV32-NEXT: li a6, 255 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a6 ; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsll.vi v13, v9, 8 @@ -638,11 +638,11 @@ ; LMULMAX1-RV32-NEXT: vmerge.vim v16, v16, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a6 +; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a2 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v13 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a6 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a3 ; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 @@ -659,7 +659,7 @@ ; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a3 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a6 +; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 @@ -670,63 +670,63 @@ ; LMULMAX1-RV64-LABEL: bswap_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi t1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (t1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a7, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a7 -; LMULMAX1-RV64-NEXT: li t0, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, t0 +; LMULMAX1-RV64-NEXT: li a2, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV64-NEXT: li a3, 40 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3 ; LMULMAX1-RV64-NEXT: lui a4, 16 ; LMULMAX1-RV64-NEXT: addiw a4, a4, -256 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: lui a6, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: lui a5, 4080 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: li a5, 255 -; LMULMAX1-RV64-NEXT: slli a2, a5, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: li a6, 255 +; LMULMAX1-RV64-NEXT: slli a7, a6, 24 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: slli a3, a5, 32 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: slli t0, a6, 32 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: slli a1, a5, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a1 +; LMULMAX1-RV64-NEXT: slli t1, a6, 40 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a7 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, t0 -; LMULMAX1-RV64-NEXT: slli a5, a5, 48 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: slli a6, a6, 48 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a7 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, t0 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a1 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a7 -; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, t0 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (t1) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -1746,8 +1746,8 @@ ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 16 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: li a6, 32 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 32 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) @@ -1763,12 +1763,12 @@ ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) ; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) +; LMULMAX1-RV64-NEXT: lui a6, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a6, %lo(.LCPI7_3)(a6) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: li a7, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1781,7 +1781,7 @@ ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 16 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1794,7 +1794,7 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -1030,8 +1030,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a6, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV32-NEXT: li a2, 1 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1050,10 +1050,10 @@ ; LMULMAX1-RV32-NEXT: lui a5, 1 ; LMULMAX1-RV32-NEXT: addi a5, a5, -241 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: li a2, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: li a6, 257 +; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 @@ -1066,7 +1066,7 @@ ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) @@ -1078,8 +1078,8 @@ ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a6, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 1 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 @@ -1098,10 +1098,10 @@ ; LMULMAX1-RV64-NEXT: lui a5, 1 ; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a2, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: li a6, 257 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1114,7 +1114,7 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) @@ -1228,8 +1228,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) ; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a6, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV32-NEXT: li a2, 1 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1248,11 +1248,11 @@ ; LMULMAX1-RV32-NEXT: lui a5, 61681 ; LMULMAX1-RV32-NEXT: addi a5, a5, -241 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: lui a6, 4112 +; LMULMAX1-RV32-NEXT: addi a6, a6, 257 +; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 @@ -1265,7 +1265,7 @@ ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) @@ -1277,8 +1277,8 @@ ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a6, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 1 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 @@ -1297,11 +1297,11 @@ ; LMULMAX1-RV64-NEXT: lui a5, 61681 ; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a2, 4112 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: lui a6, 4112 +; LMULMAX1-RV64-NEXT: addiw a6, a6, 257 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1314,7 +1314,7 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) @@ -1512,8 +1512,8 @@ ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a6, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 1 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) @@ -1529,15 +1529,15 @@ ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) ; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) +; LMULMAX1-RV64-NEXT: lui a6, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a6, %lo(.LCPI7_3)(a6) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: li a7, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1550,7 +1550,7 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -27,25 +27,25 @@ ; RV32-LABEL: add_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: lw a2, 8(a0) -; RV32-NEXT: lw a6, 12(a0) +; RV32-NEXT: lw a3, 12(a0) ; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a7, 4(a0) -; RV32-NEXT: lw a3, 4(a1) -; RV32-NEXT: lw a5, 0(a1) +; RV32-NEXT: lw a5, 4(a0) +; RV32-NEXT: lw a6, 4(a1) +; RV32-NEXT: lw a7, 0(a1) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) -; RV32-NEXT: add a3, a7, a3 -; RV32-NEXT: add a5, a4, a5 -; RV32-NEXT: sltu a4, a5, a4 -; RV32-NEXT: add a3, a3, a4 -; RV32-NEXT: add a1, a6, a1 -; RV32-NEXT: add a4, a2, t0 -; RV32-NEXT: sltu a2, a4, a2 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a6, a4, a7 +; RV32-NEXT: sltu a4, a6, a4 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a3, a2, t0 +; RV32-NEXT: sltu a2, a3, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: sw a4, 8(a0) -; RV32-NEXT: sw a5, 0(a0) +; RV32-NEXT: sw a3, 8(a0) +; RV32-NEXT: sw a6, 0(a0) ; RV32-NEXT: sw a1, 12(a0) -; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a4, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll @@ -63,24 +63,24 @@ ; ; LMULMAX1-LABEL: gather_const_v64f16: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a6, a0, 16 -; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, a0, 48 ; LMULMAX1-NEXT: addi a3, a0, 32 ; LMULMAX1-NEXT: addi a4, a0, 80 ; LMULMAX1-NEXT: addi a5, a0, 94 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-NEXT: vlse16.v v8, (a5), zero ; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: addi a2, a0, 96 -; LMULMAX1-NEXT: vse16.v v8, (a2) -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: addi a6, a0, 112 +; LMULMAX1-NEXT: addi a7, a0, 96 +; LMULMAX1-NEXT: vse16.v v8, (a7) +; LMULMAX1-NEXT: vse16.v v8, (a6) ; LMULMAX1-NEXT: vse16.v v8, (a5) ; LMULMAX1-NEXT: vse16.v v8, (a4) ; LMULMAX1-NEXT: vse16.v v8, (a3) -; LMULMAX1-NEXT: vse16.v v8, (a7) +; LMULMAX1-NEXT: vse16.v v8, (a2) ; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a6) +; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <64 x half>, <64 x half>* %x %b = extractelement <64 x half> %a, i32 47 @@ -102,24 +102,24 @@ ; ; LMULMAX1-LABEL: gather_const_v32f32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a6, a0, 16 -; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, a0, 48 ; LMULMAX1-NEXT: addi a3, a0, 32 ; LMULMAX1-NEXT: addi a4, a0, 80 ; LMULMAX1-NEXT: addi a5, a0, 68 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-NEXT: vlse32.v v8, (a5), zero ; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: addi a2, a0, 96 -; LMULMAX1-NEXT: vse32.v v8, (a2) -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: addi a6, a0, 112 +; LMULMAX1-NEXT: addi a7, a0, 96 +; LMULMAX1-NEXT: vse32.v v8, (a7) +; LMULMAX1-NEXT: vse32.v v8, (a6) ; LMULMAX1-NEXT: vse32.v v8, (a5) ; LMULMAX1-NEXT: vse32.v v8, (a4) ; LMULMAX1-NEXT: vse32.v v8, (a3) -; LMULMAX1-NEXT: vse32.v v8, (a7) +; LMULMAX1-NEXT: vse32.v v8, (a2) ; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a6) +; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <32 x float>, <32 x float>* %x %b = extractelement <32 x float> %a, i32 17 @@ -140,23 +140,23 @@ ; ; LMULMAX1-LABEL: gather_const_v16f64: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a6, a0, 16 -; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, a0, 48 ; LMULMAX1-NEXT: addi a3, a0, 32 ; LMULMAX1-NEXT: addi a4, a0, 80 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-NEXT: vlse64.v v8, (a4), zero ; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: addi a2, a0, 96 -; LMULMAX1-NEXT: vse64.v v8, (a2) -; LMULMAX1-NEXT: vse64.v v8, (a1) +; LMULMAX1-NEXT: addi a6, a0, 112 +; LMULMAX1-NEXT: addi a7, a0, 96 +; LMULMAX1-NEXT: vse64.v v8, (a7) +; LMULMAX1-NEXT: vse64.v v8, (a6) ; LMULMAX1-NEXT: vse64.v v8, (a5) ; LMULMAX1-NEXT: vse64.v v8, (a4) ; LMULMAX1-NEXT: vse64.v v8, (a3) -; LMULMAX1-NEXT: vse64.v v8, (a7) +; LMULMAX1-NEXT: vse64.v v8, (a2) ; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: vse64.v v8, (a6) +; LMULMAX1-NEXT: vse64.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x double>, <16 x double>* %x %b = extractelement <16 x double> %a, i32 10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -548,16 +548,16 @@ ; RV32-NEXT: andi a3, a2, 1 ; RV32-NEXT: beqz a3, .LBB8_2 ; RV32-NEXT: # %bb.1: # %cond.load -; RV32-NEXT: lbu a6, 1(a0) -; RV32-NEXT: lbu a7, 0(a0) +; RV32-NEXT: lbu a3, 1(a0) +; RV32-NEXT: lbu a4, 0(a0) ; RV32-NEXT: lbu a5, 3(a0) -; RV32-NEXT: lbu a3, 2(a0) -; RV32-NEXT: slli a4, a6, 8 -; RV32-NEXT: or a4, a4, a7 -; RV32-NEXT: slli a5, a5, 8 -; RV32-NEXT: or a3, a5, a3 -; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: lbu a6, 2(a0) +; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: or a3, a3, a4 +; RV32-NEXT: slli a4, a5, 8 +; RV32-NEXT: or a4, a4, a6 +; RV32-NEXT: slli a4, a4, 16 +; RV32-NEXT: or a3, a4, a3 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: andi a2, a2, 2 @@ -608,16 +608,16 @@ ; RV64-NEXT: andi a3, a2, 1 ; RV64-NEXT: beqz a3, .LBB8_2 ; RV64-NEXT: # %bb.1: # %cond.load -; RV64-NEXT: lbu a6, 1(a0) -; RV64-NEXT: lbu a7, 0(a0) +; RV64-NEXT: lbu a3, 1(a0) +; RV64-NEXT: lbu a4, 0(a0) ; RV64-NEXT: lb a5, 3(a0) -; RV64-NEXT: lbu a3, 2(a0) -; RV64-NEXT: slli a4, a6, 8 -; RV64-NEXT: or a4, a4, a7 -; RV64-NEXT: slli a5, a5, 8 -; RV64-NEXT: or a3, a5, a3 -; RV64-NEXT: slli a3, a3, 16 +; RV64-NEXT: lbu a6, 2(a0) +; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: or a3, a3, a4 +; RV64-NEXT: slli a4, a5, 8 +; RV64-NEXT: or a4, a4, a6 +; RV64-NEXT: slli a4, a4, 16 +; RV64-NEXT: or a3, a4, a3 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vmv.v.x v8, a3 ; RV64-NEXT: andi a2, a2, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -421,18 +421,18 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: bltu a5, a6, .LBB32_4 +; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: bltu a5, a4, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB32_4: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a4, a1, 256 +; CHECK-NEXT: addi a6, a1, 256 ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: bltu a2, a4, .LBB32_6 +; CHECK-NEXT: vle64.v v16, (a6), v0.t +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: bltu a2, a5, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB32_6: @@ -443,10 +443,10 @@ ; CHECK-NEXT: .LBB32_8: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v24, (a4), v0.t -; CHECK-NEXT: bltu a2, a6, .LBB32_10 +; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: bltu a2, a4, .LBB32_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_10: diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -27,13 +27,13 @@ ; CHECK-NEXT: lw a6, 24(s1) ; CHECK-NEXT: lw a7, 20(s1) ; CHECK-NEXT: lw t1, 16(s1) -; CHECK-NEXT: lw t2, 12(s1) -; CHECK-NEXT: lw a1, 8(s1) +; CHECK-NEXT: lw a1, 12(s1) +; CHECK-NEXT: lw t2, 8(s1) ; CHECK-NEXT: sw a0, 52(s1) ; CHECK-NEXT: sw a0, 48(s1) ; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd a1, 16(sp) -; CHECK-NEXT: sd t2, 8(sp) +; CHECK-NEXT: sd t2, 16(sp) +; CHECK-NEXT: sd a1, 8(sp) ; CHECK-NEXT: addi a1, s1, 48 ; CHECK-NEXT: sd t1, 0(sp) ; CHECK-NEXT: mv a0, t0 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -257,33 +257,33 @@ define void @sink_splat_mul_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_mul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB7_2 +; CHECK-NEXT: bgeu a3, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB7_5 ; CHECK-NEXT: .LBB7_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB7_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB7_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB7_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB7_7 +; CHECK-NEXT: beqz a4, .LBB7_7 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB7_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -350,33 +350,33 @@ define void @sink_splat_add_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_add_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB8_2 +; CHECK-NEXT: bgeu a3, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB8_5 ; CHECK-NEXT: .LBB8_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB8_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vadd.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB8_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB8_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB8_7 +; CHECK-NEXT: beqz a4, .LBB8_7 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB8_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -443,33 +443,33 @@ define void @sink_splat_sub_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB9_2 +; CHECK-NEXT: bgeu a3, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB9_5 ; CHECK-NEXT: .LBB9_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB9_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsub.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB9_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB9_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB9_7 +; CHECK-NEXT: beqz a4, .LBB9_7 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB9_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -536,33 +536,33 @@ define void @sink_splat_rsub_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_rsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB10_2 +; CHECK-NEXT: bgeu a3, a2, .LBB10_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB10_5 ; CHECK-NEXT: .LBB10_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB10_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vrsub.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB10_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB10_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB10_7 +; CHECK-NEXT: beqz a4, .LBB10_7 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB10_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -629,33 +629,33 @@ define void @sink_splat_and_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_and_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB11_2 +; CHECK-NEXT: bgeu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB11_5 ; CHECK-NEXT: .LBB11_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB11_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB11_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB11_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB11_7 +; CHECK-NEXT: beqz a4, .LBB11_7 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB11_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -722,33 +722,33 @@ define void @sink_splat_or_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_or_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB12_2 +; CHECK-NEXT: bgeu a3, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB12_5 ; CHECK-NEXT: .LBB12_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB12_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vor.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB12_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB12_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB12_7 +; CHECK-NEXT: beqz a4, .LBB12_7 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB12_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -815,33 +815,33 @@ define void @sink_splat_xor_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_xor_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB13_2 +; CHECK-NEXT: bgeu a3, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB13_5 ; CHECK-NEXT: .LBB13_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB13_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB13_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB13_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB13_7 +; CHECK-NEXT: beqz a4, .LBB13_7 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB13_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1016,33 +1016,33 @@ define void @sink_splat_shl_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_shl_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB17_2 +; CHECK-NEXT: bgeu a3, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB17_5 ; CHECK-NEXT: .LBB17_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB17_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsll.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB17_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB17_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB17_7 +; CHECK-NEXT: beqz a4, .LBB17_7 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB17_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1109,33 +1109,33 @@ define void @sink_splat_lshr_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_lshr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB18_2 +; CHECK-NEXT: bgeu a3, a2, .LBB18_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB18_5 ; CHECK-NEXT: .LBB18_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB18_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB18_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB18_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB18_7 +; CHECK-NEXT: beqz a4, .LBB18_7 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB18_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1202,30 +1202,30 @@ define void @sink_splat_ashr_scalable(i32* nocapture %a) { ; CHECK-LABEL: sink_splat_ashr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a7, a3, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a1, a5, 1 ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a7, .LBB19_2 +; CHECK-NEXT: bgeu a2, a1, .LBB19_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB19_5 ; CHECK-NEXT: .LBB19_2: # %vector.ph ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: remu a6, a2, a7 -; CHECK-NEXT: sub a2, a2, a6 -; CHECK-NEXT: slli a5, a3, 1 -; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: remu a3, a2, a1 +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB19_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a3) -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: vsra.vi v8, v8, 2 -; CHECK-NEXT: vs2r.v v8, (a3) -; CHECK-NEXT: add a4, a4, a7 -; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: vs2r.v v8, (a6) +; CHECK-NEXT: add a4, a4, a1 +; CHECK-NEXT: add a6, a6, a5 ; CHECK-NEXT: bne a4, a2, .LBB19_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB19_7 +; CHECK-NEXT: beqz a3, .LBB19_7 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader ; CHECK-NEXT: addi a1, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 @@ -1517,30 +1517,30 @@ define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB26_2 +; CHECK-NEXT: bgeu a6, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB26_5 ; CHECK-NEXT: .LBB26_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmul.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB26_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB26_7 +; CHECK-NEXT: beqz a4, .LBB26_7 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1610,30 +1610,30 @@ define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB27_2 +; CHECK-NEXT: bgeu a6, a3, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB27_5 ; CHECK-NEXT: .LBB27_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB27_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB27_7 +; CHECK-NEXT: beqz a4, .LBB27_7 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1703,30 +1703,30 @@ define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB28_2 +; CHECK-NEXT: bgeu a6, a3, .LBB28_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB28_5 ; CHECK-NEXT: .LBB28_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB28_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB28_7 +; CHECK-NEXT: beqz a4, .LBB28_7 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1796,30 +1796,30 @@ define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB29_2 +; CHECK-NEXT: bgeu a6, a3, .LBB29_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB29_5 ; CHECK-NEXT: .LBB29_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB29_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB29_7 +; CHECK-NEXT: beqz a4, .LBB29_7 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1889,30 +1889,30 @@ define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB30_2 +; CHECK-NEXT: bgeu a6, a3, .LBB30_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB30_5 ; CHECK-NEXT: .LBB30_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB30_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB30_7 +; CHECK-NEXT: beqz a4, .LBB30_7 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1982,30 +1982,30 @@ define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB31_2 +; CHECK-NEXT: bgeu a6, a3, .LBB31_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB31_5 ; CHECK-NEXT: .LBB31_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfrsub.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB31_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB31_7 +; CHECK-NEXT: beqz a4, .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -2159,36 +2159,36 @@ define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli t1, a7, 2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: li t0, 1024 ; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: bgeu t0, t1, .LBB34_2 +; CHECK-NEXT: bgeu t0, a4, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: remu a6, t0, t1 -; CHECK-NEXT: sub t0, t0, a6 +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: li a7, 0 +; CHECK-NEXT: remu a5, t0, a4 +; CHECK-NEXT: sub a2, t0, a5 ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a2, a0, a5 -; CHECK-NEXT: vl1re32.v v8, (a2) -; CHECK-NEXT: add a4, a1, a5 -; CHECK-NEXT: vl1re32.v v9, (a4) -; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu +; CHECK-NEXT: add t0, a0, a6 +; CHECK-NEXT: vl1re32.v v8, (t0) +; CHECK-NEXT: add t1, a1, a6 +; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmacc.vf v9, ft0, v8 -; CHECK-NEXT: vs1r.v v9, (a2) -; CHECK-NEXT: add a3, a3, t1 -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: bne a3, t0, .LBB34_3 +; CHECK-NEXT: vs1r.v v9, (t0) +; CHECK-NEXT: add a7, a7, a4 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bne a7, a2, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB34_7 +; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: addi a3, t0, -1024 -; CHECK-NEXT: slli a2, t0, 2 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB34_6: # %for.body @@ -2263,36 +2263,36 @@ define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli t1, a7, 2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: li t0, 1024 ; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: bgeu t0, t1, .LBB35_2 +; CHECK-NEXT: bgeu t0, a4, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: remu a6, t0, t1 -; CHECK-NEXT: sub t0, t0, a6 +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: li a7, 0 +; CHECK-NEXT: remu a5, t0, a4 +; CHECK-NEXT: sub a2, t0, a5 ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a2, a0, a5 -; CHECK-NEXT: vl1re32.v v8, (a2) -; CHECK-NEXT: add a4, a1, a5 -; CHECK-NEXT: vl1re32.v v9, (a4) -; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu +; CHECK-NEXT: add t0, a0, a6 +; CHECK-NEXT: vl1re32.v v8, (t0) +; CHECK-NEXT: add t1, a1, a6 +; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmacc.vf v9, ft0, v8 -; CHECK-NEXT: vs1r.v v9, (a2) -; CHECK-NEXT: add a3, a3, t1 -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: bne a3, t0, .LBB35_3 +; CHECK-NEXT: vs1r.v v9, (t0) +; CHECK-NEXT: add a7, a7, a4 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bne a7, a2, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB35_7 +; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: addi a3, t0, -1024 -; CHECK-NEXT: slli a2, t0, 2 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB35_6: # %for.body @@ -2593,33 +2593,33 @@ define void @sink_splat_udiv_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_udiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB42_2 +; CHECK-NEXT: bgeu a3, a2, .LBB42_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB42_5 ; CHECK-NEXT: .LBB42_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB42_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vdivu.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB42_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB42_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB42_7 +; CHECK-NEXT: beqz a4, .LBB42_7 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB42_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2686,33 +2686,33 @@ define void @sink_splat_sdiv_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB43_2 +; CHECK-NEXT: bgeu a3, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB43_5 ; CHECK-NEXT: .LBB43_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB43_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vdiv.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB43_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB43_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB43_7 +; CHECK-NEXT: beqz a4, .LBB43_7 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB43_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2779,33 +2779,33 @@ define void @sink_splat_urem_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_urem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB44_2 +; CHECK-NEXT: bgeu a3, a2, .LBB44_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB44_5 ; CHECK-NEXT: .LBB44_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB44_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vremu.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB44_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB44_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB44_7 +; CHECK-NEXT: beqz a4, .LBB44_7 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB44_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2872,33 +2872,33 @@ define void @sink_splat_srem_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_srem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB45_2 +; CHECK-NEXT: bgeu a3, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB45_5 ; CHECK-NEXT: .LBB45_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB45_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vrem.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB45_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB45_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB45_7 +; CHECK-NEXT: beqz a4, .LBB45_7 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB45_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -495,48 +495,48 @@ ; CHECK-LABEL: vpload_nxv17f64: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a7, a3, 1 +; CHECK-NEXT: slli a5, a3, 1 ; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: mv t0, a2 -; CHECK-NEXT: bltu a2, a7, .LBB38_2 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a5, .LBB38_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv t0, a7 +; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: .LBB38_2: -; CHECK-NEXT: sub a5, t0, a3 +; CHECK-NEXT: sub a7, a4, a3 ; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: bltu t0, a5, .LBB38_4 +; CHECK-NEXT: bltu a4, a7, .LBB38_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a6, a5 +; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: .LBB38_4: -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: srli t1, a3, 3 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, mu -; CHECK-NEXT: vslidedown.vx v0, v8, t1 -; CHECK-NEXT: slli a4, a3, 3 -; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: li a7, 0 +; CHECK-NEXT: srli t0, a3, 3 +; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v0, v8, t0 +; CHECK-NEXT: slli t0, a3, 3 +; CHECK-NEXT: add t0, a0, t0 ; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: vle64.v v16, (t0), v0.t ; CHECK-NEXT: srli a6, a3, 2 -; CHECK-NEXT: sub a4, a2, a7 -; CHECK-NEXT: slli a7, a3, 4 -; CHECK-NEXT: bltu a2, a4, .LBB38_6 +; CHECK-NEXT: sub t0, a2, a5 +; CHECK-NEXT: slli a5, a3, 4 +; CHECK-NEXT: bltu a2, t0, .LBB38_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: mv a7, t0 ; CHECK-NEXT: .LBB38_6: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-NEXT: add a2, a0, a7 -; CHECK-NEXT: bltu a5, a3, .LBB38_8 +; CHECK-NEXT: add a2, a0, a5 +; CHECK-NEXT: bltu a7, a3, .LBB38_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv a5, a3 +; CHECK-NEXT: mv a7, a3 ; CHECK-NEXT: .LBB38_8: -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, mu ; CHECK-NEXT: vle64.v v24, (a2), v0.t -; CHECK-NEXT: bltu t0, a3, .LBB38_10 +; CHECK-NEXT: bltu a4, a3, .LBB38_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: mv t0, a3 +; CHECK-NEXT: mv a4, a3 ; CHECK-NEXT: .LBB38_10: -; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: vs1r.v v24, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -408,50 +408,50 @@ ; CHECK-NEXT: slli a3, a3, 3 ; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a6, a3, 1 +; CHECK-NEXT: slli a4, a3, 1 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: mv a5, a2 -; CHECK-NEXT: bltu a2, a6, .LBB31_2 +; CHECK-NEXT: bltu a2, a4, .LBB31_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: .LBB31_2: -; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: mv a7, a5 ; CHECK-NEXT: bltu a5, a3, .LBB31_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv a7, a3 ; CHECK-NEXT: .LBB31_4: -; CHECK-NEXT: li a7, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, mu ; CHECK-NEXT: sub a0, a5, a3 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vse64.v v8, (a1), v0.t ; CHECK-NEXT: bltu a5, a0, .LBB31_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a7, a0 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB31_6: ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: srli a4, a3, 3 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, mu -; CHECK-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-NEXT: slli a4, a3, 3 -; CHECK-NEXT: add a4, a1, a4 -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, mu -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vse64.v v8, (a4), v0.t -; CHECK-NEXT: srli a7, a3, 2 -; CHECK-NEXT: sub a4, a2, a6 -; CHECK-NEXT: slli a5, a3, 4 -; CHECK-NEXT: bltu a2, a4, .LBB31_8 +; CHECK-NEXT: srli a5, a3, 3 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v0, v24, a5 +; CHECK-NEXT: slli a5, a3, 3 +; CHECK-NEXT: add a5, a1, a5 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, mu +; CHECK-NEXT: addi a6, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vse64.v v8, (a5), v0.t +; CHECK-NEXT: srli a5, a3, 2 +; CHECK-NEXT: sub a6, a2, a4 +; CHECK-NEXT: slli a4, a3, 4 +; CHECK-NEXT: bltu a2, a6, .LBB31_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: mv a0, a6 ; CHECK-NEXT: .LBB31_8: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v0, v24, a7 -; CHECK-NEXT: add a1, a1, a5 +; CHECK-NEXT: vslidedown.vx v0, v24, a5 +; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: bltu a0, a3, .LBB31_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: mv a0, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1185,7 +1185,7 @@ ; RV64-LABEL: vpreduce_umax_nxv32i32: ; RV64: # %bb.0: ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: srli a6, a3, 2 +; RV64-NEXT: srli a2, a3, 2 ; RV64-NEXT: slli a4, a0, 32 ; RV64-NEXT: slli a0, a3, 1 ; RV64-NEXT: srli a3, a4, 32 @@ -1195,8 +1195,8 @@ ; RV64-NEXT: mv a4, a0 ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: li a5, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, mu -; RV64-NEXT: vslidedown.vx v24, v0, a6 +; RV64-NEXT: vsetvli a6, zero, e8, mf2, ta, mu +; RV64-NEXT: vslidedown.vx v24, v0, a2 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV64-NEXT: vmv.s.x v25, a3 ; RV64-NEXT: vsetvli zero, a4, e32, m8, tu, mu diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll --- a/llvm/test/CodeGen/RISCV/sadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll @@ -160,12 +160,12 @@ ; RV32IZbbZbt-NEXT: add a0, a4, a0 ; RV32IZbbZbt-NEXT: srai a4, a0, 31 ; RV32IZbbZbt-NEXT: lui a5, 524288 -; RV32IZbbZbt-NEXT: xor a6, a4, a5 -; RV32IZbbZbt-NEXT: xor a5, a1, a0 +; RV32IZbbZbt-NEXT: xor a5, a4, a5 +; RV32IZbbZbt-NEXT: xor a6, a1, a0 ; RV32IZbbZbt-NEXT: xor a1, a1, a3 -; RV32IZbbZbt-NEXT: andn a1, a5, a1 +; RV32IZbbZbt-NEXT: andn a1, a6, a1 ; RV32IZbbZbt-NEXT: slti a3, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a0 +; RV32IZbbZbt-NEXT: cmov a1, a3, a5, a0 ; RV32IZbbZbt-NEXT: cmov a0, a3, a4, a2 ; RV32IZbbZbt-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll @@ -168,13 +168,13 @@ ; RV32IZbbZbt-NEXT: add a0, a2, a0 ; RV32IZbbZbt-NEXT: srai a2, a0, 31 ; RV32IZbbZbt-NEXT: lui a4, 524288 -; RV32IZbbZbt-NEXT: xor a6, a2, a4 -; RV32IZbbZbt-NEXT: xor a4, a1, a0 +; RV32IZbbZbt-NEXT: xor a4, a2, a4 +; RV32IZbbZbt-NEXT: xor a6, a1, a0 ; RV32IZbbZbt-NEXT: xor a1, a1, a5 -; RV32IZbbZbt-NEXT: andn a1, a4, a1 -; RV32IZbbZbt-NEXT: slti a4, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a4, a6, a0 -; RV32IZbbZbt-NEXT: cmov a0, a4, a2, a3 +; RV32IZbbZbt-NEXT: andn a1, a6, a1 +; RV32IZbbZbt-NEXT: slti a5, a1, 0 +; RV32IZbbZbt-NEXT: cmov a1, a5, a4, a0 +; RV32IZbbZbt-NEXT: cmov a0, a5, a2, a3 ; RV32IZbbZbt-NEXT: ret ; ; RV64IZbbZbt-LABEL: func64: diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll @@ -65,20 +65,20 @@ ; RV32I-NEXT: .LBB1_2: # %entry ; RV32I-NEXT: beqz a1, .LBB1_5 ; RV32I-NEXT: # %bb.3: # %entry -; RV32I-NEXT: addi a7, a4, 4 +; RV32I-NEXT: addi a5, a4, 4 ; RV32I-NEXT: bnez a1, .LBB1_6 ; RV32I-NEXT: .LBB1_4: -; RV32I-NEXT: addi a5, a3, 8 +; RV32I-NEXT: addi a6, a3, 8 ; RV32I-NEXT: j .LBB1_7 ; RV32I-NEXT: .LBB1_5: -; RV32I-NEXT: addi a7, a3, 4 +; RV32I-NEXT: addi a5, a3, 4 ; RV32I-NEXT: beqz a1, .LBB1_4 ; RV32I-NEXT: .LBB1_6: # %entry -; RV32I-NEXT: addi a5, a4, 8 +; RV32I-NEXT: addi a6, a4, 8 ; RV32I-NEXT: .LBB1_7: # %entry -; RV32I-NEXT: lw a6, 0(a2) -; RV32I-NEXT: lw a7, 0(a7) -; RV32I-NEXT: lw a2, 0(a5) +; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw a5, 0(a5) +; RV32I-NEXT: lw a6, 0(a6) ; RV32I-NEXT: beqz a1, .LBB1_9 ; RV32I-NEXT: # %bb.8: # %entry ; RV32I-NEXT: addi a1, a4, 12 @@ -88,25 +88,25 @@ ; RV32I-NEXT: .LBB1_10: # %entry ; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: sw a6, 0(a0) +; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: ret ; ; RV32IBT-LABEL: cmovcc128: ; RV32IBT: # %bb.0: # %entry -; RV32IBT-NEXT: addi a6, a3, 12 -; RV32IBT-NEXT: addi a7, a4, 12 -; RV32IBT-NEXT: addi t0, a3, 8 -; RV32IBT-NEXT: addi t1, a4, 8 -; RV32IBT-NEXT: addi t2, a3, 4 -; RV32IBT-NEXT: addi a5, a4, 4 +; RV32IBT-NEXT: addi a5, a3, 12 +; RV32IBT-NEXT: addi a6, a4, 12 +; RV32IBT-NEXT: addi a7, a3, 8 +; RV32IBT-NEXT: addi t0, a4, 8 +; RV32IBT-NEXT: addi t1, a3, 4 +; RV32IBT-NEXT: addi t2, a4, 4 ; RV32IBT-NEXT: xori a1, a1, 123 ; RV32IBT-NEXT: or a1, a1, a2 ; RV32IBT-NEXT: cmov a2, a1, a4, a3 -; RV32IBT-NEXT: cmov a3, a1, a5, t2 -; RV32IBT-NEXT: cmov a4, a1, t1, t0 -; RV32IBT-NEXT: cmov a1, a1, a7, a6 +; RV32IBT-NEXT: cmov a3, a1, t2, t1 +; RV32IBT-NEXT: cmov a4, a1, t0, a7 +; RV32IBT-NEXT: cmov a1, a1, a6, a5 ; RV32IBT-NEXT: lw a1, 0(a1) ; RV32IBT-NEXT: lw a4, 0(a4) ; RV32IBT-NEXT: lw a3, 0(a3) @@ -192,20 +192,20 @@ ; RV32I-NEXT: .LBB3_2: # %entry ; RV32I-NEXT: bnez a1, .LBB3_5 ; RV32I-NEXT: # %bb.3: # %entry -; RV32I-NEXT: addi a7, a3, 4 +; RV32I-NEXT: addi a5, a3, 4 ; RV32I-NEXT: beqz a1, .LBB3_6 ; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: addi a5, a2, 8 +; RV32I-NEXT: addi a6, a2, 8 ; RV32I-NEXT: j .LBB3_7 ; RV32I-NEXT: .LBB3_5: -; RV32I-NEXT: addi a7, a2, 4 +; RV32I-NEXT: addi a5, a2, 4 ; RV32I-NEXT: bnez a1, .LBB3_4 ; RV32I-NEXT: .LBB3_6: # %entry -; RV32I-NEXT: addi a5, a3, 8 +; RV32I-NEXT: addi a6, a3, 8 ; RV32I-NEXT: .LBB3_7: # %entry -; RV32I-NEXT: lw a6, 0(a4) -; RV32I-NEXT: lw a7, 0(a7) -; RV32I-NEXT: lw a4, 0(a5) +; RV32I-NEXT: lw a4, 0(a4) +; RV32I-NEXT: lw a5, 0(a5) +; RV32I-NEXT: lw a6, 0(a6) ; RV32I-NEXT: bnez a1, .LBB3_9 ; RV32I-NEXT: # %bb.8: # %entry ; RV32I-NEXT: addi a1, a3, 12 @@ -215,26 +215,26 @@ ; RV32I-NEXT: .LBB3_10: # %entry ; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: sw a6, 0(a0) +; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: ret ; ; RV32IBT-LABEL: cmov128: ; RV32IBT: # %bb.0: # %entry -; RV32IBT-NEXT: addi a6, a3, 12 -; RV32IBT-NEXT: addi a7, a2, 12 -; RV32IBT-NEXT: addi t0, a3, 8 -; RV32IBT-NEXT: addi t1, a2, 8 -; RV32IBT-NEXT: addi a4, a3, 4 -; RV32IBT-NEXT: addi a5, a2, 4 +; RV32IBT-NEXT: addi a4, a3, 12 +; RV32IBT-NEXT: addi a5, a2, 12 +; RV32IBT-NEXT: addi a6, a3, 8 +; RV32IBT-NEXT: addi a7, a2, 8 +; RV32IBT-NEXT: addi t0, a3, 4 +; RV32IBT-NEXT: addi t1, a2, 4 ; RV32IBT-NEXT: andi a1, a1, 1 ; RV32IBT-NEXT: cmov a2, a1, a2, a3 -; RV32IBT-NEXT: cmov a3, a1, a5, a4 -; RV32IBT-NEXT: cmov a4, a1, t1, t0 -; RV32IBT-NEXT: cmov a1, a1, a7, a6 +; RV32IBT-NEXT: cmov a3, a1, t1, t0 +; RV32IBT-NEXT: cmov a6, a1, a7, a6 +; RV32IBT-NEXT: cmov a1, a1, a5, a4 ; RV32IBT-NEXT: lw a1, 0(a1) -; RV32IBT-NEXT: lw a4, 0(a4) +; RV32IBT-NEXT: lw a4, 0(a6) ; RV32IBT-NEXT: lw a3, 0(a3) ; RV32IBT-NEXT: lw a2, 0(a2) ; RV32IBT-NEXT: sw a1, 12(a0) @@ -476,17 +476,17 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; RV32I-LABEL: cmovdiffcc: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: andi a1, a1, 1 -; RV32I-NEXT: beqz a0, .LBB7_3 +; RV32I-NEXT: andi a6, a0, 1 +; RV32I-NEXT: andi a0, a1, 1 +; RV32I-NEXT: beqz a6, .LBB7_3 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: beqz a1, .LBB7_4 +; RV32I-NEXT: beqz a0, .LBB7_4 ; RV32I-NEXT: .LBB7_2: # %entry ; RV32I-NEXT: add a0, a2, a4 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB7_3: # %entry ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bnez a1, .LBB7_2 +; RV32I-NEXT: bnez a0, .LBB7_2 ; RV32I-NEXT: .LBB7_4: # %entry ; RV32I-NEXT: mv a4, a5 ; RV32I-NEXT: add a0, a2, a4 @@ -503,17 +503,17 @@ ; ; RV64I-LABEL: cmovdiffcc: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: andi a1, a1, 1 -; RV64I-NEXT: beqz a0, .LBB7_3 +; RV64I-NEXT: andi a6, a0, 1 +; RV64I-NEXT: andi a0, a1, 1 +; RV64I-NEXT: beqz a6, .LBB7_3 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: beqz a1, .LBB7_4 +; RV64I-NEXT: beqz a0, .LBB7_4 ; RV64I-NEXT: .LBB7_2: # %entry ; RV64I-NEXT: addw a0, a2, a4 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB7_3: # %entry ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bnez a1, .LBB7_2 +; RV64I-NEXT: bnez a0, .LBB7_2 ; RV64I-NEXT: .LBB7_4: # %entry ; RV64I-NEXT: mv a4, a5 ; RV64I-NEXT: addw a0, a2, a4 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -179,19 +179,19 @@ ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sraw a0, a0, a1 ; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: addiw s2, a1, 1365 +; RV64I-NEXT: addiw s0, a1, 1365 ; RV64I-NEXT: lui a1, 209715 ; RV64I-NEXT: addiw s1, a1, 819 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw s3, a1, -241 +; RV64I-NEXT: addiw s2, a1, -241 ; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s0, a1, 257 +; RV64I-NEXT: addiw s3, a1, 257 ; RV64I-NEXT: .LBB4_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call bar@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: and a0, a0, s0 ; RV64I-NEXT: subw a0, a1, a0 ; RV64I-NEXT: and a2, a0, s1 ; RV64I-NEXT: srli a0, a0, 2 @@ -199,8 +199,8 @@ ; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: mulw a0, a0, s0 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: mulw a0, a0, s3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: bnez a1, .LBB4_1 ; RV64I-NEXT: # %bb.2: # %bb7 diff --git a/llvm/test/CodeGen/RISCV/shadowcallstack.ll b/llvm/test/CodeGen/RISCV/shadowcallstack.ll --- a/llvm/test/CodeGen/RISCV/shadowcallstack.ll +++ b/llvm/test/CodeGen/RISCV/shadowcallstack.ll @@ -82,14 +82,14 @@ ; RV32-NEXT: .cfi_offset s1, -12 ; RV32-NEXT: .cfi_offset s3, -16 ; RV32-NEXT: call bar@plt -; RV32-NEXT: mv s3, a0 +; RV32-NEXT: mv s0, a0 ; RV32-NEXT: call bar@plt ; RV32-NEXT: mv s1, a0 ; RV32-NEXT: call bar@plt -; RV32-NEXT: mv s0, a0 +; RV32-NEXT: mv s3, a0 ; RV32-NEXT: call bar@plt -; RV32-NEXT: add a1, s3, s1 -; RV32-NEXT: add a0, s0, a0 +; RV32-NEXT: add a1, s0, s1 +; RV32-NEXT: add a0, s3, a0 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -115,14 +115,14 @@ ; RV64-NEXT: .cfi_offset s1, -24 ; RV64-NEXT: .cfi_offset s3, -32 ; RV64-NEXT: call bar@plt -; RV64-NEXT: mv s3, a0 +; RV64-NEXT: mv s0, a0 ; RV64-NEXT: call bar@plt ; RV64-NEXT: mv s1, a0 ; RV64-NEXT: call bar@plt -; RV64-NEXT: mv s0, a0 +; RV64-NEXT: mv s3, a0 ; RV64-NEXT: call bar@plt -; RV64-NEXT: addw a1, s3, s1 -; RV64-NEXT: addw a0, s0, a0 +; RV64-NEXT: addw a1, s0, s1 +; RV64-NEXT: addw a0, s3, a0 ; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -148,120 +148,116 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: lshr128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t4, 12(a1) -; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: li a3, 64 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: li a6, 32 ; RV32I-NEXT: sub t1, a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub t5, a3, a2 ; RV32I-NEXT: li t2, 31 -; RV32I-NEXT: bltz t5, .LBB6_2 +; RV32I-NEXT: bltz t1, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a3, t0, t5 +; RV32I-NEXT: sll t6, a5, t1 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a3, t4, t1 -; RV32I-NEXT: sub a4, t2, t1 -; RV32I-NEXT: srli a5, t0, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: sll a6, a4, t0 +; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: srli t3, a5, 1 +; RV32I-NEXT: srl a7, t3, a7 +; RV32I-NEXT: or t6, a6, a7 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: lw a5, 4(a1) -; RV32I-NEXT: addi t6, a2, -32 -; RV32I-NEXT: bgez t6, .LBB6_5 +; RV32I-NEXT: lw t5, 4(a1) +; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: bgez a6, .LBB6_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a5, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: srl a7, t5, a2 +; RV32I-NEXT: or t6, t6, a7 ; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: addi a4, a2, -96 +; RV32I-NEXT: addi t4, a2, -96 ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz a4, .LBB6_7 +; RV32I-NEXT: bltz t4, .LBB6_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB6_8 +; RV32I-NEXT: bgeu a2, a3, .LBB6_8 ; RV32I-NEXT: j .LBB6_9 ; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: srl a7, t4, t3 -; RV32I-NEXT: bltu a2, a6, .LBB6_9 +; RV32I-NEXT: srl a7, a4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB6_9 ; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv t6, a7 ; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: mv a7, t5 ; RV32I-NEXT: beqz a2, .LBB6_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: .LBB6_11: -; RV32I-NEXT: lw s0, 0(a1) +; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sub t2, t2, a2 -; RV32I-NEXT: bltz t6, .LBB6_13 +; RV32I-NEXT: bltz a6, .LBB6_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl a5, a5, t6 -; RV32I-NEXT: bltz t5, .LBB6_14 +; RV32I-NEXT: srl t5, t5, a6 +; RV32I-NEXT: bltz t1, .LBB6_14 ; RV32I-NEXT: j .LBB6_15 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: srl a3, s0, a2 -; RV32I-NEXT: slli a5, a5, 1 -; RV32I-NEXT: sll a5, a5, t2 -; RV32I-NEXT: or a5, a3, a5 -; RV32I-NEXT: bgez t5, .LBB6_15 +; RV32I-NEXT: srl t6, a1, a2 +; RV32I-NEXT: slli t5, t5, 1 +; RV32I-NEXT: sll t5, t5, t2 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: bgez t1, .LBB6_15 ; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: sll a3, t0, t1 -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: sll t0, a5, t0 +; RV32I-NEXT: or t5, t5, t0 ; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: slli a3, t4, 1 -; RV32I-NEXT: bltz a4, .LBB6_17 +; RV32I-NEXT: slli t0, a4, 1 +; RV32I-NEXT: bltz t4, .LBB6_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: srl a4, t4, a4 -; RV32I-NEXT: bgeu a2, a6, .LBB6_18 +; RV32I-NEXT: srl t1, a4, t4 +; RV32I-NEXT: bgeu a2, a3, .LBB6_18 ; RV32I-NEXT: j .LBB6_19 ; RV32I-NEXT: .LBB6_17: -; RV32I-NEXT: li a4, 95 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sll a4, a3, a4 -; RV32I-NEXT: srl a1, t0, t3 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB6_19 +; RV32I-NEXT: li t1, 95 +; RV32I-NEXT: sub t1, t1, a2 +; RV32I-NEXT: sll t1, t0, t1 +; RV32I-NEXT: srl t3, a5, t3 +; RV32I-NEXT: or t1, t3, t1 +; RV32I-NEXT: bltu a2, a3, .LBB6_19 ; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: mv t5, t1 ; RV32I-NEXT: .LBB6_19: ; RV32I-NEXT: bnez a2, .LBB6_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz t6, .LBB6_23 +; RV32I-NEXT: bltz a6, .LBB6_23 ; RV32I-NEXT: .LBB6_21: -; RV32I-NEXT: srl a3, t4, t6 -; RV32I-NEXT: bgeu a2, a6, .LBB6_24 +; RV32I-NEXT: srl a5, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB6_24 ; RV32I-NEXT: j .LBB6_25 ; RV32I-NEXT: .LBB6_22: -; RV32I-NEXT: mv s0, a5 -; RV32I-NEXT: bgez t6, .LBB6_21 +; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: bgez a6, .LBB6_21 ; RV32I-NEXT: .LBB6_23: -; RV32I-NEXT: srl a1, t0, a2 -; RV32I-NEXT: sll a3, a3, t2 -; RV32I-NEXT: or a3, a1, a3 -; RV32I-NEXT: bltu a2, a6, .LBB6_25 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: or a5, a5, t0 +; RV32I-NEXT: bltu a2, a3, .LBB6_25 ; RV32I-NEXT: .LBB6_24: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: .LBB6_25: -; RV32I-NEXT: bltz t6, .LBB6_27 +; RV32I-NEXT: bltz a6, .LBB6_27 ; RV32I-NEXT: # %bb.26: ; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB6_28 +; RV32I-NEXT: bgeu a2, a3, .LBB6_28 ; RV32I-NEXT: j .LBB6_29 ; RV32I-NEXT: .LBB6_27: -; RV32I-NEXT: srl a4, t4, a2 -; RV32I-NEXT: bltu a2, a6, .LBB6_29 +; RV32I-NEXT: srl a4, a4, a2 +; RV32I-NEXT: bltu a2, a3, .LBB6_29 ; RV32I-NEXT: .LBB6_28: ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: .LBB6_29: ; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a3, 8(a0) -; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr128: @@ -290,120 +286,118 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw t2, 8(a1) -; RV32I-NEXT: lw t5, 12(a1) -; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: sub t1, a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub t6, a3, a2 +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: li a3, 64 +; RV32I-NEXT: sub t1, a3, a2 +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: sub t2, a6, a2 ; RV32I-NEXT: li t4, 31 -; RV32I-NEXT: bltz t6, .LBB7_2 +; RV32I-NEXT: bltz t2, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll s0, t2, t6 +; RV32I-NEXT: sll s0, a5, t2 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a3, t5, t1 -; RV32I-NEXT: sub a4, t4, t1 -; RV32I-NEXT: srli a5, t2, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or s0, a3, a4 +; RV32I-NEXT: sll a6, a4, t1 +; RV32I-NEXT: sub a7, t4, t1 +; RV32I-NEXT: srli t0, a5, 1 +; RV32I-NEXT: srl a7, t0, a7 +; RV32I-NEXT: or s0, a6, a7 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: lw a5, 4(a1) -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: bgez a3, .LBB7_5 +; RV32I-NEXT: lw t6, 4(a1) +; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: bgez a6, .LBB7_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a5, a2 -; RV32I-NEXT: or s0, s0, a4 +; RV32I-NEXT: srl a7, t6, a2 +; RV32I-NEXT: or s0, s0, a7 ; RV32I-NEXT: .LBB7_5: ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: addi a4, a2, -96 -; RV32I-NEXT: srai a7, t5, 31 -; RV32I-NEXT: bltz a4, .LBB7_7 +; RV32I-NEXT: addi t5, a2, -96 +; RV32I-NEXT: srai a7, a4, 31 +; RV32I-NEXT: bltz t5, .LBB7_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: bgeu a2, a6, .LBB7_8 +; RV32I-NEXT: bgeu a2, a3, .LBB7_8 ; RV32I-NEXT: j .LBB7_9 ; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: sra t0, t5, t3 -; RV32I-NEXT: bltu a2, a6, .LBB7_9 +; RV32I-NEXT: sra t0, a4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB7_9 ; RV32I-NEXT: .LBB7_8: ; RV32I-NEXT: mv s0, t0 ; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: beqz a2, .LBB7_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: .LBB7_11: -; RV32I-NEXT: lw s1, 0(a1) +; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sub t4, t4, a2 -; RV32I-NEXT: bltz a3, .LBB7_13 +; RV32I-NEXT: bltz a6, .LBB7_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl a5, a5, a3 -; RV32I-NEXT: bltz t6, .LBB7_14 +; RV32I-NEXT: srl t6, t6, a6 +; RV32I-NEXT: bltz t2, .LBB7_14 ; RV32I-NEXT: j .LBB7_15 ; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: srl s0, s1, a2 -; RV32I-NEXT: slli a5, a5, 1 -; RV32I-NEXT: sll a5, a5, t4 -; RV32I-NEXT: or a5, s0, a5 -; RV32I-NEXT: bgez t6, .LBB7_15 +; RV32I-NEXT: srl s0, a1, a2 +; RV32I-NEXT: slli t6, t6, 1 +; RV32I-NEXT: sll t6, t6, t4 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: bgez t2, .LBB7_15 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: sll s0, t2, t1 -; RV32I-NEXT: or a5, a5, s0 +; RV32I-NEXT: sll t1, a5, t1 +; RV32I-NEXT: or t6, t6, t1 ; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: slli s0, t5, 1 -; RV32I-NEXT: bltz a4, .LBB7_17 +; RV32I-NEXT: slli t1, a4, 1 +; RV32I-NEXT: bltz t5, .LBB7_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sra a4, t5, a4 -; RV32I-NEXT: bgeu a2, a6, .LBB7_18 +; RV32I-NEXT: sra t2, a4, t5 +; RV32I-NEXT: bgeu a2, a3, .LBB7_18 ; RV32I-NEXT: j .LBB7_19 ; RV32I-NEXT: .LBB7_17: -; RV32I-NEXT: li a4, 95 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sll a4, s0, a4 -; RV32I-NEXT: srl a1, t2, t3 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB7_19 +; RV32I-NEXT: li t2, 95 +; RV32I-NEXT: sub t2, t2, a2 +; RV32I-NEXT: sll t2, t1, t2 +; RV32I-NEXT: srl t3, a5, t3 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: bltu a2, a3, .LBB7_19 ; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: mv t6, t2 ; RV32I-NEXT: .LBB7_19: ; RV32I-NEXT: bnez a2, .LBB7_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a3, .LBB7_23 +; RV32I-NEXT: bltz a6, .LBB7_23 ; RV32I-NEXT: .LBB7_21: -; RV32I-NEXT: sra a4, t5, a3 -; RV32I-NEXT: bgeu a2, a6, .LBB7_24 +; RV32I-NEXT: sra a5, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB7_24 ; RV32I-NEXT: j .LBB7_25 ; RV32I-NEXT: .LBB7_22: -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: bgez a3, .LBB7_21 +; RV32I-NEXT: mv a1, t6 +; RV32I-NEXT: bgez a6, .LBB7_21 ; RV32I-NEXT: .LBB7_23: -; RV32I-NEXT: srl a1, t2, a2 -; RV32I-NEXT: sll a4, s0, t4 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB7_25 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: sll t1, t1, t4 +; RV32I-NEXT: or a5, a5, t1 +; RV32I-NEXT: bltu a2, a3, .LBB7_25 ; RV32I-NEXT: .LBB7_24: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB7_25: -; RV32I-NEXT: bltz a3, .LBB7_27 +; RV32I-NEXT: bltz a6, .LBB7_27 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: bgeu a2, a6, .LBB7_28 +; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: bgeu a2, a3, .LBB7_28 ; RV32I-NEXT: j .LBB7_29 ; RV32I-NEXT: .LBB7_27: -; RV32I-NEXT: sra a3, t5, a2 -; RV32I-NEXT: bltu a2, a6, .LBB7_29 +; RV32I-NEXT: sra a4, a4, a2 +; RV32I-NEXT: bltu a2, a3, .LBB7_29 ; RV32I-NEXT: .LBB7_28: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB7_29: -; RV32I-NEXT: sw a3, 12(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw s1, 0(a0) +; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw t0, 4(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -431,120 +425,116 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: lw t4, 0(a1) -; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: li a3, 64 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: li a6, 32 ; RV32I-NEXT: sub t1, a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub t5, a3, a2 ; RV32I-NEXT: li t2, 31 -; RV32I-NEXT: bltz t5, .LBB8_2 +; RV32I-NEXT: bltz t1, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a3, t0, t5 +; RV32I-NEXT: srl t6, a5, t1 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a3, t4, t1 -; RV32I-NEXT: sub a4, t2, t1 -; RV32I-NEXT: slli a5, t0, 1 -; RV32I-NEXT: sll a4, a5, a4 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: srl a6, a4, t0 +; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: slli t3, a5, 1 +; RV32I-NEXT: sll a7, t3, a7 +; RV32I-NEXT: or t6, a6, a7 ; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: addi t6, a2, -32 -; RV32I-NEXT: bgez t6, .LBB8_5 +; RV32I-NEXT: lw t5, 8(a1) +; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: bgez a6, .LBB8_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a4, a5, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: sll a7, t5, a2 +; RV32I-NEXT: or t6, t6, a7 ; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: addi a4, a2, -96 +; RV32I-NEXT: addi t4, a2, -96 ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz a4, .LBB8_7 +; RV32I-NEXT: bltz t4, .LBB8_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB8_8 +; RV32I-NEXT: bgeu a2, a3, .LBB8_8 ; RV32I-NEXT: j .LBB8_9 ; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: sll a7, t4, t3 -; RV32I-NEXT: bltu a2, a6, .LBB8_9 +; RV32I-NEXT: sll a7, a4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB8_9 ; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv t6, a7 ; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: mv a7, t5 ; RV32I-NEXT: beqz a2, .LBB8_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: .LBB8_11: -; RV32I-NEXT: lw s0, 12(a1) +; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: sub t2, t2, a2 -; RV32I-NEXT: bltz t6, .LBB8_13 +; RV32I-NEXT: bltz a6, .LBB8_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: sll a5, a5, t6 -; RV32I-NEXT: bltz t5, .LBB8_14 +; RV32I-NEXT: sll t5, t5, a6 +; RV32I-NEXT: bltz t1, .LBB8_14 ; RV32I-NEXT: j .LBB8_15 ; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll a3, s0, a2 -; RV32I-NEXT: srli a5, a5, 1 -; RV32I-NEXT: srl a5, a5, t2 -; RV32I-NEXT: or a5, a3, a5 -; RV32I-NEXT: bgez t5, .LBB8_15 +; RV32I-NEXT: sll t6, a1, a2 +; RV32I-NEXT: srli t5, t5, 1 +; RV32I-NEXT: srl t5, t5, t2 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: bgez t1, .LBB8_15 ; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: srl a3, t0, t1 -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: srl t0, a5, t0 +; RV32I-NEXT: or t5, t5, t0 ; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: srli a3, t4, 1 -; RV32I-NEXT: bltz a4, .LBB8_17 +; RV32I-NEXT: srli t0, a4, 1 +; RV32I-NEXT: bltz t4, .LBB8_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sll a4, t4, a4 -; RV32I-NEXT: bgeu a2, a6, .LBB8_18 +; RV32I-NEXT: sll t1, a4, t4 +; RV32I-NEXT: bgeu a2, a3, .LBB8_18 ; RV32I-NEXT: j .LBB8_19 ; RV32I-NEXT: .LBB8_17: -; RV32I-NEXT: li a4, 95 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: srl a4, a3, a4 -; RV32I-NEXT: sll a1, t0, t3 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB8_19 +; RV32I-NEXT: li t1, 95 +; RV32I-NEXT: sub t1, t1, a2 +; RV32I-NEXT: srl t1, t0, t1 +; RV32I-NEXT: sll t3, a5, t3 +; RV32I-NEXT: or t1, t3, t1 +; RV32I-NEXT: bltu a2, a3, .LBB8_19 ; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: mv t5, t1 ; RV32I-NEXT: .LBB8_19: ; RV32I-NEXT: bnez a2, .LBB8_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz t6, .LBB8_23 +; RV32I-NEXT: bltz a6, .LBB8_23 ; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: sll a3, t4, t6 -; RV32I-NEXT: bgeu a2, a6, .LBB8_24 +; RV32I-NEXT: sll a5, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB8_24 ; RV32I-NEXT: j .LBB8_25 ; RV32I-NEXT: .LBB8_22: -; RV32I-NEXT: mv s0, a5 -; RV32I-NEXT: bgez t6, .LBB8_21 +; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: bgez a6, .LBB8_21 ; RV32I-NEXT: .LBB8_23: -; RV32I-NEXT: sll a1, t0, a2 -; RV32I-NEXT: srl a3, a3, t2 -; RV32I-NEXT: or a3, a1, a3 -; RV32I-NEXT: bltu a2, a6, .LBB8_25 +; RV32I-NEXT: sll a5, a5, a2 +; RV32I-NEXT: srl t0, t0, t2 +; RV32I-NEXT: or a5, a5, t0 +; RV32I-NEXT: bltu a2, a3, .LBB8_25 ; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: bltz t6, .LBB8_27 +; RV32I-NEXT: bltz a6, .LBB8_27 ; RV32I-NEXT: # %bb.26: ; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB8_28 +; RV32I-NEXT: bgeu a2, a3, .LBB8_28 ; RV32I-NEXT: j .LBB8_29 ; RV32I-NEXT: .LBB8_27: -; RV32I-NEXT: sll a4, t4, a2 -; RV32I-NEXT: bltu a2, a6, .LBB8_29 +; RV32I-NEXT: sll a4, a4, a2 +; RV32I-NEXT: bltu a2, a3, .LBB8_29 ; RV32I-NEXT: .LBB8_28: ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: .LBB8_29: ; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw s0, 12(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: sw a7, 8(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl128: @@ -606,69 +596,69 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: lw t2, 8(a1) -; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw t2, 0(a1) ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a7, 4(a1) -; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: andi a1, a2, 64 -; RV32I-NEXT: mv a5, a7 -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: beqz a1, .LBB10_2 +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: andi t1, a2, 64 +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: mv a4, t2 +; RV32I-NEXT: beqz t1, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a5, t1 -; RV32I-NEXT: mv a6, t2 +; RV32I-NEXT: mv t0, a1 +; RV32I-NEXT: mv a4, a3 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: andi a4, a2, 32 -; RV32I-NEXT: mv t0, a6 -; RV32I-NEXT: bnez a4, .LBB10_13 +; RV32I-NEXT: andi a6, a2, 32 +; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: bnez a6, .LBB10_13 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: bnez a1, .LBB10_14 +; RV32I-NEXT: bnez t1, .LBB10_14 ; RV32I-NEXT: .LBB10_4: -; RV32I-NEXT: beqz a4, .LBB10_6 +; RV32I-NEXT: beqz a6, .LBB10_6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: slli t3, a5, 1 -; RV32I-NEXT: not a3, a2 -; RV32I-NEXT: beqz a1, .LBB10_8 +; RV32I-NEXT: slli t3, t0, 1 +; RV32I-NEXT: not t2, a2 +; RV32I-NEXT: beqz t1, .LBB10_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: mv a1, a7 ; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: srl a7, t0, a2 -; RV32I-NEXT: sll a1, t3, a3 -; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: beqz a4, .LBB10_10 +; RV32I-NEXT: srl a7, a5, a2 +; RV32I-NEXT: sll t1, t3, t2 +; RV32I-NEXT: srl t0, t0, a2 +; RV32I-NEXT: beqz a6, .LBB10_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t2, t1 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB10_10: -; RV32I-NEXT: or a7, a1, a7 -; RV32I-NEXT: slli a1, t2, 1 -; RV32I-NEXT: sll a1, a1, a3 -; RV32I-NEXT: or a5, a1, a5 -; RV32I-NEXT: srl a1, t2, a2 -; RV32I-NEXT: beqz a4, .LBB10_12 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: slli t1, a3, 1 +; RV32I-NEXT: sll t1, t1, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: beqz a6, .LBB10_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: slli a4, t1, 1 -; RV32I-NEXT: sll a4, a4, a3 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srl a2, t1, a2 -; RV32I-NEXT: slli a4, t0, 1 -; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: or a2, a3, a2 -; RV32I-NEXT: sw a2, 12(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: slli a4, a1, 1 +; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slli a2, a5, 1 +; RV32I-NEXT: sll a2, a2, t2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw t0, 4(a0) ; RV32I-NEXT: sw a7, 0(a0) ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: beqz a1, .LBB10_4 +; RV32I-NEXT: mv a5, t0 +; RV32I-NEXT: beqz t1, .LBB10_4 ; RV32I-NEXT: .LBB10_14: -; RV32I-NEXT: mv t2, a3 -; RV32I-NEXT: bnez a4, .LBB10_5 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bnez a6, .LBB10_5 ; RV32I-NEXT: j .LBB10_6 ; ; RV64I-LABEL: fshr128_minsize: diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -306,13 +306,13 @@ ; RV32-NEXT: lbu a1, 12(s0) ; RV32-NEXT: lw a2, 8(s0) ; RV32-NEXT: andi a3, a0, 1 -; RV32-NEXT: neg s2, a3 +; RV32-NEXT: neg s1, a3 ; RV32-NEXT: slli a3, a1, 30 ; RV32-NEXT: srli a4, a2, 2 -; RV32-NEXT: or s3, a4, a3 +; RV32-NEXT: or s2, a4, a3 ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: neg s1, a1 +; RV32-NEXT: neg s3, a1 ; RV32-NEXT: slli a1, a2, 31 ; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 @@ -327,17 +327,17 @@ ; RV32-NEXT: mv s6, a1 ; RV32-NEXT: li a2, -5 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: mv a0, s3 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: mv a1, s3 ; RV32-NEXT: call __moddi3@plt -; RV32-NEXT: mv s1, a0 +; RV32-NEXT: mv s2, a0 ; RV32-NEXT: mv s3, a1 ; RV32-NEXT: li a2, 6 ; RV32-NEXT: mv a0, s4 -; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a1, s1 ; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __moddi3@plt -; RV32-NEXT: xori a2, s1, 2 +; RV32-NEXT: xori a2, s2, 2 ; RV32-NEXT: or a2, a2, s3 ; RV32-NEXT: snez a2, a2 ; RV32-NEXT: xori a3, s5, 1 @@ -460,13 +460,13 @@ ; RV32M-NEXT: lbu a1, 12(s0) ; RV32M-NEXT: lw a2, 8(s0) ; RV32M-NEXT: andi a3, a0, 1 -; RV32M-NEXT: neg s2, a3 +; RV32M-NEXT: neg s1, a3 ; RV32M-NEXT: slli a3, a1, 30 ; RV32M-NEXT: srli a4, a2, 2 -; RV32M-NEXT: or s3, a4, a3 +; RV32M-NEXT: or s2, a4, a3 ; RV32M-NEXT: srli a1, a1, 2 ; RV32M-NEXT: andi a1, a1, 1 -; RV32M-NEXT: neg s1, a1 +; RV32M-NEXT: neg s3, a1 ; RV32M-NEXT: slli a1, a2, 31 ; RV32M-NEXT: srli a0, a0, 1 ; RV32M-NEXT: or a0, a0, a1 @@ -481,17 +481,17 @@ ; RV32M-NEXT: mv s6, a1 ; RV32M-NEXT: li a2, -5 ; RV32M-NEXT: li a3, -1 -; RV32M-NEXT: mv a0, s3 -; RV32M-NEXT: mv a1, s1 +; RV32M-NEXT: mv a0, s2 +; RV32M-NEXT: mv a1, s3 ; RV32M-NEXT: call __moddi3@plt -; RV32M-NEXT: mv s1, a0 +; RV32M-NEXT: mv s2, a0 ; RV32M-NEXT: mv s3, a1 ; RV32M-NEXT: li a2, 6 ; RV32M-NEXT: mv a0, s4 -; RV32M-NEXT: mv a1, s2 +; RV32M-NEXT: mv a1, s1 ; RV32M-NEXT: li a3, 0 ; RV32M-NEXT: call __moddi3@plt -; RV32M-NEXT: xori a2, s1, 2 +; RV32M-NEXT: xori a2, s2, 2 ; RV32M-NEXT: or a2, a2, s3 ; RV32M-NEXT: snez a2, a2 ; RV32M-NEXT: xori a3, s5, 1 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -18,44 +18,42 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 12(a1) -; RV32I-NEXT: lh s3, 8(a1) -; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh s0, 12(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, -124 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 98 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, -1003 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_srem_vec_1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 12(a1) +; RV32IM-NEXT: lh a2, 12(a1) ; RV32IM-NEXT: lh a3, 8(a1) ; RV32IM-NEXT: lh a4, 0(a1) ; RV32IM-NEXT: lh a1, 4(a1) @@ -63,88 +61,86 @@ ; RV32IM-NEXT: addi a5, a5, 389 ; RV32IM-NEXT: mulh a5, a4, a5 ; RV32IM-NEXT: add a5, a5, a4 -; RV32IM-NEXT: srli a2, a5, 31 +; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a2, a4, a2 -; RV32IM-NEXT: lui a4, 507375 -; RV32IM-NEXT: addi a4, a4, 1981 -; RV32IM-NEXT: mulh a4, a1, a4 -; RV32IM-NEXT: sub a4, a4, a1 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srli a4, a4, 6 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: li a5, -124 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a1, a1, a4 -; RV32IM-NEXT: lui a4, 342392 -; RV32IM-NEXT: addi a4, a4, 669 -; RV32IM-NEXT: mulh a4, a3, a4 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srli a4, a4, 5 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: li a5, 98 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a3, a3, a4 -; RV32IM-NEXT: lui a4, 780943 -; RV32IM-NEXT: addi a4, a4, 1809 -; RV32IM-NEXT: mulh a4, a6, a4 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srli a4, a4, 8 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: li a5, -1003 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a4, a6, a4 -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: lui a5, 507375 +; RV32IM-NEXT: addi a5, a5, 1981 +; RV32IM-NEXT: mulh a5, a1, a5 +; RV32IM-NEXT: sub a5, a5, a1 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, -124 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: lui a5, 342392 +; RV32IM-NEXT: addi a5, a5, 669 +; RV32IM-NEXT: mulh a5, a3, a5 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 5 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, 98 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: lui a5, 780943 +; RV32IM-NEXT: addi a5, a5, 1809 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 8 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, -1003 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_1: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 24(a1) -; RV64I-NEXT: lh s3, 16(a1) -; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lh s0, 24(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, -124 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 98 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, -1003 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_srem_vec_1: @@ -152,45 +148,45 @@ ; RV64IM-NEXT: lh a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) -; RV64IM-NEXT: lh a6, 24(a1) -; RV64IM-NEXT: lh a7, 16(a1) +; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: lh a5, 16(a1) ; RV64IM-NEXT: lh a1, 8(a1) ; RV64IM-NEXT: mulh a3, a2, a3 ; RV64IM-NEXT: add a3, a3, a2 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6) +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a3, a3, a7 ; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a1, a4 +; RV64IM-NEXT: mulh a3, a1, a6 ; RV64IM-NEXT: sub a3, a3, a1 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) -; RV64IM-NEXT: li a5, -124 -; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6) +; RV64IM-NEXT: li a7, -124 +; RV64IM-NEXT: mulw a3, a3, a7 ; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulh a3, a7, a4 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulh a3, a5, a6 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 5 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) -; RV64IM-NEXT: li a5, 98 -; RV64IM-NEXT: mulw a3, a3, a5 -; RV64IM-NEXT: subw a3, a7, a3 -; RV64IM-NEXT: mulh a4, a6, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: li a5, -1003 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a4, a6, a4 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6) +; RV64IM-NEXT: li a7, 98 +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a3, a5, a3 +; RV64IM-NEXT: mulh a5, a4, a6 +; RV64IM-NEXT: srli a6, a5, 63 +; RV64IM-NEXT: srli a5, a5, 7 +; RV64IM-NEXT: addw a5, a5, a6 +; RV64IM-NEXT: li a6, -1003 +; RV64IM-NEXT: mulw a5, a5, a6 +; RV64IM-NEXT: subw a4, a4, a5 ; RV64IM-NEXT: sh a4, 6(a0) ; RV64IM-NEXT: sh a3, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) @@ -210,126 +206,122 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 12(a1) -; RV32I-NEXT: lh s3, 8(a1) -; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh s0, 12(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_srem_vec_2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 12(a1) +; RV32IM-NEXT: lh a2, 12(a1) ; RV32IM-NEXT: lh a3, 8(a1) ; RV32IM-NEXT: lh a4, 0(a1) ; RV32IM-NEXT: lh a1, 4(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a2, a4, a5 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: srli a7, a2, 31 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: add a2, a2, a7 +; RV32IM-NEXT: mulh a6, a4, a5 +; RV32IM-NEXT: add a6, a6, a4 +; RV32IM-NEXT: srli a7, a6, 31 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 -; RV32IM-NEXT: mul a2, a2, a7 -; RV32IM-NEXT: sub t0, a4, a2 -; RV32IM-NEXT: mulh a4, a1, a5 -; RV32IM-NEXT: add a4, a4, a1 -; RV32IM-NEXT: srli a2, a4, 31 -; RV32IM-NEXT: srli a4, a4, 6 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: mul a2, a2, a7 -; RV32IM-NEXT: sub a1, a1, a2 -; RV32IM-NEXT: mulh a2, a3, a5 -; RV32IM-NEXT: add a2, a2, a3 -; RV32IM-NEXT: srli a4, a2, 31 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: mul a2, a2, a7 -; RV32IM-NEXT: sub a2, a3, a2 -; RV32IM-NEXT: mulh a3, a6, a5 -; RV32IM-NEXT: add a3, a3, a6 -; RV32IM-NEXT: srli a4, a3, 31 -; RV32IM-NEXT: srli a3, a3, 6 -; RV32IM-NEXT: add a3, a3, a4 -; RV32IM-NEXT: mul a3, a3, a7 -; RV32IM-NEXT: sub a3, a6, a3 -; RV32IM-NEXT: sh a3, 6(a0) -; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a4, a4, a6 +; RV32IM-NEXT: mulh a6, a1, a5 +; RV32IM-NEXT: add a6, a6, a1 +; RV32IM-NEXT: srli t0, a6, 31 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: add a6, a6, t0 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a1, a1, a6 +; RV32IM-NEXT: mulh a6, a3, a5 +; RV32IM-NEXT: add a6, a6, a3 +; RV32IM-NEXT: srli t0, a6, 31 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: add a6, a6, t0 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a3, a3, a6 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: add a5, a5, a2 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: mul a5, a5, a7 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh t0, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_2: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 24(a1) -; RV64I-NEXT: lh s3, 16(a1) -; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lh s0, 24(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_srem_vec_2: @@ -337,42 +329,42 @@ ; RV64IM-NEXT: lh a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) -; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a4, 24(a1) ; RV64IM-NEXT: lh a5, 16(a1) ; RV64IM-NEXT: lh a1, 8(a1) -; RV64IM-NEXT: mulh a4, a2, a3 -; RV64IM-NEXT: add a4, a4, a2 -; RV64IM-NEXT: srli a7, a4, 63 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: addw a4, a4, a7 +; RV64IM-NEXT: mulh a6, a2, a3 +; RV64IM-NEXT: add a6, a6, a2 +; RV64IM-NEXT: srli a7, a6, 63 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, a7 ; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: mulw a4, a4, a7 -; RV64IM-NEXT: subw t0, a2, a4 -; RV64IM-NEXT: mulh a4, a1, a3 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a2, a4, 63 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: addw a2, a4, a2 -; RV64IM-NEXT: mulw a2, a2, a7 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: mulh a2, a5, a3 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a4, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a2, a2, a4 -; RV64IM-NEXT: mulw a2, a2, a7 -; RV64IM-NEXT: subw a2, a5, a2 -; RV64IM-NEXT: mulh a3, a6, a3 -; RV64IM-NEXT: add a3, a3, a6 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: mulh a6, a1, a3 +; RV64IM-NEXT: add a6, a6, a1 +; RV64IM-NEXT: srli t0, a6, 63 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, t0 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a1, a1, a6 +; RV64IM-NEXT: mulh a6, a5, a3 +; RV64IM-NEXT: add a6, a6, a5 +; RV64IM-NEXT: srli t0, a6, 63 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, t0 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a5, a5, a6 +; RV64IM-NEXT: mulh a3, a4, a3 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: addw a3, a3, a6 ; RV64IM-NEXT: mulw a3, a3, a7 -; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a5, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -394,47 +386,46 @@ ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 0(a1) -; RV32I-NEXT: lh s3, 4(a1) -; RV32I-NEXT: lh s4, 8(a1) -; RV32I-NEXT: lh s1, 12(a1) +; RV32I-NEXT: lh s1, 0(a1) +; RV32I-NEXT: lh s2, 4(a1) +; RV32I-NEXT: lh s3, 8(a1) +; RV32I-NEXT: lh s4, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s8, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: mv s9, a0 -; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __divsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __divsi3@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __divsi3@plt ; RV32I-NEXT: add a0, s8, a0 -; RV32I-NEXT: add a1, s7, s1 -; RV32I-NEXT: add a2, s6, s4 -; RV32I-NEXT: add a3, s5, s9 +; RV32I-NEXT: add a1, s7, s2 +; RV32I-NEXT: add a2, s6, s3 +; RV32I-NEXT: add a3, s5, s4 ; RV32I-NEXT: sh a3, 6(s0) ; RV32I-NEXT: sh a2, 4(s0) ; RV32I-NEXT: sh a1, 2(s0) @@ -449,127 +440,124 @@ ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: combine_srem_sdiv: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 0(a1) +; RV32IM-NEXT: lh a2, 0(a1) ; RV32IM-NEXT: lh a3, 4(a1) ; RV32IM-NEXT: lh a4, 12(a1) ; RV32IM-NEXT: lh a1, 8(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a2, a4, a5 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: srli a7, a2, 31 -; RV32IM-NEXT: srai a2, a2, 6 -; RV32IM-NEXT: add t0, a2, a7 +; RV32IM-NEXT: mulh a6, a4, a5 +; RV32IM-NEXT: add a6, a6, a4 +; RV32IM-NEXT: srli a7, a6, 31 +; RV32IM-NEXT: srai a6, a6, 6 +; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 -; RV32IM-NEXT: mul a2, t0, a7 -; RV32IM-NEXT: sub t1, a4, a2 -; RV32IM-NEXT: mulh a4, a1, a5 -; RV32IM-NEXT: add a4, a4, a1 -; RV32IM-NEXT: srli a2, a4, 31 -; RV32IM-NEXT: srai a4, a4, 6 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: mul a4, a2, a7 -; RV32IM-NEXT: sub t2, a1, a4 -; RV32IM-NEXT: mulh a4, a3, a5 -; RV32IM-NEXT: add a4, a4, a3 -; RV32IM-NEXT: srli a1, a4, 31 -; RV32IM-NEXT: srai a4, a4, 6 -; RV32IM-NEXT: add a1, a4, a1 -; RV32IM-NEXT: mul a4, a1, a7 -; RV32IM-NEXT: sub a3, a3, a4 -; RV32IM-NEXT: mulh a4, a6, a5 +; RV32IM-NEXT: mul t0, a6, a7 +; RV32IM-NEXT: sub a4, a4, t0 +; RV32IM-NEXT: mulh t0, a1, a5 +; RV32IM-NEXT: add t0, t0, a1 +; RV32IM-NEXT: srli t1, t0, 31 +; RV32IM-NEXT: srai t0, t0, 6 +; RV32IM-NEXT: add t0, t0, t1 +; RV32IM-NEXT: mul t1, t0, a7 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulh t1, a3, a5 +; RV32IM-NEXT: add t1, t1, a3 +; RV32IM-NEXT: srli t2, t1, 31 +; RV32IM-NEXT: srai t1, t1, 6 +; RV32IM-NEXT: add t1, t1, t2 +; RV32IM-NEXT: mul t2, t1, a7 +; RV32IM-NEXT: sub a3, a3, t2 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: add a5, a5, a2 +; RV32IM-NEXT: srli t2, a5, 31 +; RV32IM-NEXT: srai a5, a5, 6 +; RV32IM-NEXT: add a5, a5, t2 +; RV32IM-NEXT: mul a7, a5, a7 +; RV32IM-NEXT: sub a2, a2, a7 +; RV32IM-NEXT: add a2, a2, a5 +; RV32IM-NEXT: add a3, a3, t1 +; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a4, a4, a6 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srai a4, a4, 6 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: mul a5, a4, a7 -; RV32IM-NEXT: sub a5, a6, a5 -; RV32IM-NEXT: add a4, a5, a4 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: add a2, t2, a2 -; RV32IM-NEXT: add a3, t1, t0 -; RV32IM-NEXT: sh a3, 6(a0) -; RV32IM-NEXT: sh a2, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a4, 0(a0) +; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_srem_sdiv: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 0(a1) -; RV64I-NEXT: lh s3, 8(a1) -; RV64I-NEXT: lh s4, 16(a1) -; RV64I-NEXT: lh s1, 24(a1) +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lh s1, 0(a1) +; RV64I-NEXT: lh s2, 8(a1) +; RV64I-NEXT: lh s3, 16(a1) +; RV64I-NEXT: lh s4, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s5, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s6, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s7, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s8, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: mv s9, a0 -; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __divdi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __divdi3@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __divdi3@plt ; RV64I-NEXT: addw a0, s8, a0 -; RV64I-NEXT: addw a1, s7, s1 -; RV64I-NEXT: addw a2, s6, s4 -; RV64I-NEXT: addw a3, s5, s9 +; RV64I-NEXT: addw a1, s7, s2 +; RV64I-NEXT: addw a2, s6, s3 +; RV64I-NEXT: addw a3, s5, s4 ; RV64I-NEXT: sh a3, 6(s0) ; RV64I-NEXT: sh a2, 4(s0) ; RV64I-NEXT: sh a1, 2(s0) ; RV64I-NEXT: sh a0, 0(s0) -; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: combine_srem_sdiv: @@ -577,45 +565,45 @@ ; RV64IM-NEXT: lh a2, 24(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) -; RV64IM-NEXT: lh a6, 0(a1) +; RV64IM-NEXT: lh a4, 0(a1) ; RV64IM-NEXT: lh a5, 8(a1) ; RV64IM-NEXT: lh a1, 16(a1) -; RV64IM-NEXT: mulh a4, a2, a3 -; RV64IM-NEXT: add a4, a4, a2 -; RV64IM-NEXT: srli a7, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw t0, a4, a7 +; RV64IM-NEXT: mulh a6, a2, a3 +; RV64IM-NEXT: add a6, a6, a2 +; RV64IM-NEXT: srli a7, a6, 63 +; RV64IM-NEXT: srai a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, a7 ; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: mulw a4, t0, a7 -; RV64IM-NEXT: subw t1, a2, a4 -; RV64IM-NEXT: mulh a4, a1, a3 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a2, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw a2, a4, a2 -; RV64IM-NEXT: mulw a4, a2, a7 -; RV64IM-NEXT: subw t2, a1, a4 -; RV64IM-NEXT: mulh a4, a5, a3 -; RV64IM-NEXT: add a4, a4, a5 -; RV64IM-NEXT: srli a1, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw a1, a4, a1 -; RV64IM-NEXT: mulw a4, a1, a7 -; RV64IM-NEXT: subw a4, a5, a4 -; RV64IM-NEXT: mulh a3, a6, a3 -; RV64IM-NEXT: add a3, a3, a6 -; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: mulw t0, a6, a7 +; RV64IM-NEXT: subw a2, a2, t0 +; RV64IM-NEXT: mulh t0, a1, a3 +; RV64IM-NEXT: add t0, t0, a1 +; RV64IM-NEXT: srli t1, t0, 63 +; RV64IM-NEXT: srai t0, t0, 6 +; RV64IM-NEXT: addw t0, t0, t1 +; RV64IM-NEXT: mulw t1, t0, a7 +; RV64IM-NEXT: subw a1, a1, t1 +; RV64IM-NEXT: mulh t1, a5, a3 +; RV64IM-NEXT: add t1, t1, a5 +; RV64IM-NEXT: srli t2, t1, 63 +; RV64IM-NEXT: srai t1, t1, 6 +; RV64IM-NEXT: addw t1, t1, t2 +; RV64IM-NEXT: mulw t2, t1, a7 +; RV64IM-NEXT: subw a5, a5, t2 +; RV64IM-NEXT: mulh a3, a4, a3 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: srli t2, a3, 63 ; RV64IM-NEXT: srai a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a5 -; RV64IM-NEXT: mulw a5, a3, a7 -; RV64IM-NEXT: subw a5, a6, a5 -; RV64IM-NEXT: addw a3, a5, a3 -; RV64IM-NEXT: addw a1, a4, a1 -; RV64IM-NEXT: addw a2, t2, a2 -; RV64IM-NEXT: addw a4, t1, t0 -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: addw a3, a3, t2 +; RV64IM-NEXT: mulw a7, a3, a7 +; RV64IM-NEXT: subw a4, a4, a7 +; RV64IM-NEXT: addw a3, a4, a3 +; RV64IM-NEXT: addw a4, a5, t1 +; RV64IM-NEXT: addw a1, a1, t0 +; RV64IM-NEXT: addw a2, a2, a6 +; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, @@ -642,21 +630,21 @@ ; RV32I-NEXT: srli a4, a2, 26 ; RV32I-NEXT: add a4, a2, a4 ; RV32I-NEXT: andi a4, a4, -64 -; RV32I-NEXT: sub s2, a2, a4 +; RV32I-NEXT: sub s1, a2, a4 ; RV32I-NEXT: srli a2, a1, 27 ; RV32I-NEXT: add a2, a1, a2 ; RV32I-NEXT: andi a2, a2, -32 -; RV32I-NEXT: sub s3, a1, a2 +; RV32I-NEXT: sub s2, a1, a2 ; RV32I-NEXT: srli a1, a3, 29 ; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: andi a1, a1, -8 -; RV32I-NEXT: sub s1, a3, a1 +; RV32I-NEXT: sub s3, a3, a1 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) -; RV32I-NEXT: sh s3, 2(s0) -; RV32I-NEXT: sh s2, 0(s0) +; RV32I-NEXT: sh s3, 4(s0) +; RV32I-NEXT: sh s2, 2(s0) +; RV32I-NEXT: sh s1, 0(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -677,9 +665,9 @@ ; RV32IM-NEXT: add a5, a5, a4 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 -; RV32IM-NEXT: add a6, a5, a6 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a5, a6, a5 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 ; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: srli a5, a1, 26 ; RV32IM-NEXT: add a5, a1, a5 @@ -715,21 +703,21 @@ ; RV64I-NEXT: srli a4, a2, 58 ; RV64I-NEXT: add a4, a2, a4 ; RV64I-NEXT: andi a4, a4, -64 -; RV64I-NEXT: subw s2, a2, a4 +; RV64I-NEXT: subw s1, a2, a4 ; RV64I-NEXT: srli a2, a1, 59 ; RV64I-NEXT: add a2, a1, a2 ; RV64I-NEXT: andi a2, a2, -32 -; RV64I-NEXT: subw s3, a1, a2 +; RV64I-NEXT: subw s2, a1, a2 ; RV64I-NEXT: srli a1, a3, 61 ; RV64I-NEXT: add a1, a3, a1 ; RV64I-NEXT: andi a1, a1, -8 -; RV64I-NEXT: subw s1, a3, a1 +; RV64I-NEXT: subw s3, a3, a1 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) -; RV64I-NEXT: sh s3, 2(s0) -; RV64I-NEXT: sh s2, 0(s0) +; RV64I-NEXT: sh s3, 4(s0) +; RV64I-NEXT: sh s2, 2(s0) +; RV64I-NEXT: sh s1, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -750,9 +738,9 @@ ; RV64IM-NEXT: add a3, a3, a2 ; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a6, a3, a6 -; RV64IM-NEXT: li a3, 95 -; RV64IM-NEXT: mulw a3, a6, a3 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: li a6, 95 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 58 ; RV64IM-NEXT: add a3, a1, a3 @@ -785,10 +773,10 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s0, 12(a1) ; RV32I-NEXT: lh s1, 8(a1) ; RV32I-NEXT: lh a2, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 654 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3@plt @@ -799,12 +787,12 @@ ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) -; RV32I-NEXT: sh s3, 2(s0) -; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s2) +; RV32I-NEXT: sh s1, 4(s2) +; RV32I-NEXT: sh s3, 2(s2) +; RV32I-NEXT: sh zero, 0(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -862,10 +850,10 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s0, 24(a1) ; RV64I-NEXT: lh s1, 16(a1) ; RV64I-NEXT: lh a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt @@ -876,12 +864,12 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) -; RV64I-NEXT: sh s3, 2(s0) -; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s2) +; RV64I-NEXT: sh s1, 4(s2) +; RV64I-NEXT: sh s3, 2(s2) +; RV64I-NEXT: sh zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -895,7 +883,7 @@ ; RV64IM-NEXT: lh a2, 16(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) -; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a4, 24(a1) ; RV64IM-NEXT: lh a1, 8(a1) ; RV64IM-NEXT: mulh a3, a2, a3 ; RV64IM-NEXT: add a3, a3, a2 @@ -904,26 +892,26 @@ ; RV64IM-NEXT: addw a3, a3, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) -; RV64IM-NEXT: li a4, 23 -; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: mulh a3, a1, a5 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 8 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI4_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI4_2)(a4) -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulh a3, a6, a4 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 11 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, 1 -; RV64IM-NEXT: addiw a4, a4, 1327 -; RV64IM-NEXT: mulw a3, a3, a4 -; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh zero, 0(a0) ; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a1, 2(a0) @@ -945,7 +933,7 @@ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lh a2, 4(a1) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s1, 12(a1) ; RV32I-NEXT: lh a0, 8(a1) ; RV32I-NEXT: srli a1, a2, 17 ; RV32I-NEXT: add a1, a2, a1 @@ -954,13 +942,13 @@ ; RV32I-NEXT: sub s3, a2, a1 ; RV32I-NEXT: li a1, 23 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh s2, 4(s0) ; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh s3, 2(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1017,7 +1005,7 @@ ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lh a2, 8(a1) ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s1, 24(a1) ; RV64I-NEXT: lh a0, 16(a1) ; RV64I-NEXT: srli a1, a2, 49 ; RV64I-NEXT: add a1, a2, a1 @@ -1026,13 +1014,13 @@ ; RV64I-NEXT: subw s3, a2, a1 ; RV64I-NEXT: li a1, 23 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh s2, 4(s0) ; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh s3, 2(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1054,13 +1042,13 @@ ; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 4 ; RV64IM-NEXT: addw a3, a3, a5 -; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: lui a5, %hi(.LCPI5_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5) -; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: lui a6, %hi(.LCPI5_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI5_1)(a6) +; RV64IM-NEXT: mulw a3, a3, a5 ; RV64IM-NEXT: lh a1, 8(a1) ; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: mulh a3, a4, a6 ; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 11 ; RV64IM-NEXT: addw a3, a3, a5 @@ -1097,16 +1085,15 @@ ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s2, 24(a1) -; RV32I-NEXT: lw s3, 28(a1) -; RV32I-NEXT: lw s4, 16(a1) -; RV32I-NEXT: lw s5, 20(a1) -; RV32I-NEXT: lw s6, 8(a1) -; RV32I-NEXT: lw s1, 12(a1) +; RV32I-NEXT: lw s0, 24(a1) +; RV32I-NEXT: lw s1, 28(a1) +; RV32I-NEXT: lw s2, 16(a1) +; RV32I-NEXT: lw s3, 20(a1) +; RV32I-NEXT: lw s4, 8(a1) +; RV32I-NEXT: lw s5, 12(a1) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: li a3, 0 @@ -1114,33 +1101,33 @@ ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: mv s8, a1 ; RV32I-NEXT: li a2, 654 -; RV32I-NEXT: mv a0, s6 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __moddi3@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv s9, a1 -; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s5 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3@plt -; RV32I-NEXT: sw a1, 28(s0) -; RV32I-NEXT: sw a0, 24(s0) -; RV32I-NEXT: sw s1, 20(s0) -; RV32I-NEXT: sw s4, 16(s0) -; RV32I-NEXT: sw s9, 12(s0) -; RV32I-NEXT: sw s6, 8(s0) -; RV32I-NEXT: sw s8, 4(s0) -; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __moddi3@plt +; RV32I-NEXT: sw a1, 28(s6) +; RV32I-NEXT: sw a0, 24(s6) +; RV32I-NEXT: sw s3, 20(s6) +; RV32I-NEXT: sw s2, 16(s6) +; RV32I-NEXT: sw s5, 12(s6) +; RV32I-NEXT: sw s4, 8(s6) +; RV32I-NEXT: sw s8, 4(s6) +; RV32I-NEXT: sw s7, 0(s6) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1151,7 +1138,6 @@ ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; @@ -1168,16 +1154,15 @@ ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s2, 24(a1) -; RV32IM-NEXT: lw s3, 28(a1) -; RV32IM-NEXT: lw s4, 16(a1) -; RV32IM-NEXT: lw s5, 20(a1) -; RV32IM-NEXT: lw s6, 8(a1) -; RV32IM-NEXT: lw s1, 12(a1) +; RV32IM-NEXT: lw s0, 24(a1) +; RV32IM-NEXT: lw s1, 28(a1) +; RV32IM-NEXT: lw s2, 16(a1) +; RV32IM-NEXT: lw s3, 20(a1) +; RV32IM-NEXT: lw s4, 8(a1) +; RV32IM-NEXT: lw s5, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) -; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: mv s6, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: li a3, 0 @@ -1185,33 +1170,33 @@ ; RV32IM-NEXT: mv s7, a0 ; RV32IM-NEXT: mv s8, a1 ; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s6 -; RV32IM-NEXT: mv a1, s1 -; RV32IM-NEXT: li a3, 0 -; RV32IM-NEXT: call __moddi3@plt -; RV32IM-NEXT: mv s6, a0 -; RV32IM-NEXT: mv s9, a1 -; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s4 ; RV32IM-NEXT: mv a1, s5 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3@plt ; RV32IM-NEXT: mv s4, a0 -; RV32IM-NEXT: mv s1, a1 -; RV32IM-NEXT: lui a0, 1 -; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv s5, a1 +; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s2 ; RV32IM-NEXT: mv a1, s3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3@plt -; RV32IM-NEXT: sw a1, 28(s0) -; RV32IM-NEXT: sw a0, 24(s0) -; RV32IM-NEXT: sw s1, 20(s0) -; RV32IM-NEXT: sw s4, 16(s0) -; RV32IM-NEXT: sw s9, 12(s0) -; RV32IM-NEXT: sw s6, 8(s0) -; RV32IM-NEXT: sw s8, 4(s0) -; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: mv s2, a0 +; RV32IM-NEXT: mv s3, a1 +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv a0, s0 +; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: li a3, 0 +; RV32IM-NEXT: call __moddi3@plt +; RV32IM-NEXT: sw a1, 28(s6) +; RV32IM-NEXT: sw a0, 24(s6) +; RV32IM-NEXT: sw s3, 20(s6) +; RV32IM-NEXT: sw s2, 16(s6) +; RV32IM-NEXT: sw s5, 12(s6) +; RV32IM-NEXT: sw s4, 8(s6) +; RV32IM-NEXT: sw s8, 4(s6) +; RV32IM-NEXT: sw s7, 0(s6) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1222,7 +1207,6 @@ ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 48 ; RV32IM-NEXT: ret ; @@ -1234,10 +1218,10 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld s2, 24(a1) +; RV64I-NEXT: ld s0, 24(a1) ; RV64I-NEXT: ld s1, 16(a1) ; RV64I-NEXT: ld a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt @@ -1248,12 +1232,12 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sd a0, 24(s0) -; RV64I-NEXT: sd s1, 16(s0) -; RV64I-NEXT: sd s3, 8(s0) -; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd a0, 24(s2) +; RV64I-NEXT: sd s1, 16(s2) +; RV64I-NEXT: sd s3, 8(s2) +; RV64I-NEXT: sd zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1267,7 +1251,7 @@ ; RV64IM-NEXT: ld a2, 16(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) -; RV64IM-NEXT: ld a6, 24(a1) +; RV64IM-NEXT: ld a4, 24(a1) ; RV64IM-NEXT: ld a1, 8(a1) ; RV64IM-NEXT: mulh a3, a2, a3 ; RV64IM-NEXT: add a3, a3, a2 @@ -1276,26 +1260,26 @@ ; RV64IM-NEXT: add a3, a3, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) -; RV64IM-NEXT: li a4, 23 -; RV64IM-NEXT: mul a3, a3, a4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: mul a3, a3, a6 ; RV64IM-NEXT: sub a2, a2, a3 ; RV64IM-NEXT: mulh a3, a1, a5 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srai a3, a3, 8 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI6_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI6_2)(a4) -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mul a3, a3, a6 ; RV64IM-NEXT: sub a1, a1, a3 -; RV64IM-NEXT: mulh a3, a6, a4 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srai a3, a3, 11 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: lui a4, 1 -; RV64IM-NEXT: addiw a4, a4, 1327 -; RV64IM-NEXT: mul a3, a3, a4 -; RV64IM-NEXT: sub a3, a6, a3 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a3, a4, a3 ; RV64IM-NEXT: sd zero, 0(a0) ; RV64IM-NEXT: sd a3, 24(a0) ; RV64IM-NEXT: sd a1, 8(a0) diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll @@ -156,16 +156,16 @@ ; RV32IZbbZbt-NEXT: sltu a4, a0, a2 ; RV32IZbbZbt-NEXT: sub a5, a1, a3 ; RV32IZbbZbt-NEXT: sub a4, a5, a4 -; RV32IZbbZbt-NEXT: srai a6, a4, 31 -; RV32IZbbZbt-NEXT: lui a5, 524288 -; RV32IZbbZbt-NEXT: xor a7, a6, a5 -; RV32IZbbZbt-NEXT: xor a5, a1, a4 +; RV32IZbbZbt-NEXT: srai a5, a4, 31 +; RV32IZbbZbt-NEXT: lui a6, 524288 +; RV32IZbbZbt-NEXT: xor a6, a5, a6 +; RV32IZbbZbt-NEXT: xor a7, a1, a4 ; RV32IZbbZbt-NEXT: xor a1, a1, a3 -; RV32IZbbZbt-NEXT: and a1, a1, a5 +; RV32IZbbZbt-NEXT: and a1, a1, a7 ; RV32IZbbZbt-NEXT: slti a3, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a3, a7, a4 +; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a4 ; RV32IZbbZbt-NEXT: sub a0, a0, a2 -; RV32IZbbZbt-NEXT: cmov a0, a3, a6, a0 +; RV32IZbbZbt-NEXT: cmov a0, a3, a5, a0 ; RV32IZbbZbt-NEXT: ret ; ; RV64IZbbZbt-LABEL: func2: diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -164,16 +164,16 @@ ; RV32IZbbZbt-NEXT: sltu a2, a0, a4 ; RV32IZbbZbt-NEXT: sub a3, a1, a5 ; RV32IZbbZbt-NEXT: sub a2, a3, a2 -; RV32IZbbZbt-NEXT: srai a6, a2, 31 -; RV32IZbbZbt-NEXT: lui a3, 524288 -; RV32IZbbZbt-NEXT: xor a7, a6, a3 -; RV32IZbbZbt-NEXT: xor a3, a1, a2 +; RV32IZbbZbt-NEXT: srai a3, a2, 31 +; RV32IZbbZbt-NEXT: lui a6, 524288 +; RV32IZbbZbt-NEXT: xor a6, a3, a6 +; RV32IZbbZbt-NEXT: xor a7, a1, a2 ; RV32IZbbZbt-NEXT: xor a1, a1, a5 -; RV32IZbbZbt-NEXT: and a1, a1, a3 -; RV32IZbbZbt-NEXT: slti a3, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a3, a7, a2 +; RV32IZbbZbt-NEXT: and a1, a1, a7 +; RV32IZbbZbt-NEXT: slti a5, a1, 0 +; RV32IZbbZbt-NEXT: cmov a1, a5, a6, a2 ; RV32IZbbZbt-NEXT: sub a0, a0, a4 -; RV32IZbbZbt-NEXT: cmov a0, a3, a6, a0 +; RV32IZbbZbt-NEXT: cmov a0, a5, a3, a0 ; RV32IZbbZbt-NEXT: ret ; ; RV64IZbbZbt-LABEL: func64: diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll --- a/llvm/test/CodeGen/RISCV/stack-store-check.ll +++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll @@ -32,12 +32,12 @@ ; CHECK-NEXT: lw s6, %lo(U)(a0) ; CHECK-NEXT: lw s7, %lo(U+4)(a0) ; CHECK-NEXT: lw s8, %lo(U+8)(a0) -; CHECK-NEXT: lw s2, %lo(U+12)(a0) +; CHECK-NEXT: lw s0, %lo(U+12)(a0) ; CHECK-NEXT: sw zero, 612(sp) ; CHECK-NEXT: sw zero, 608(sp) ; CHECK-NEXT: sw zero, 604(sp) ; CHECK-NEXT: sw zero, 600(sp) -; CHECK-NEXT: sw s2, 596(sp) +; CHECK-NEXT: sw s0, 596(sp) ; CHECK-NEXT: sw s8, 592(sp) ; CHECK-NEXT: sw s7, 588(sp) ; CHECK-NEXT: addi a0, sp, 616 @@ -45,21 +45,21 @@ ; CHECK-NEXT: addi a2, sp, 584 ; CHECK-NEXT: sw s6, 584(sp) ; CHECK-NEXT: call __subtf3@plt -; CHECK-NEXT: lw s4, 616(sp) -; CHECK-NEXT: lw s5, 620(sp) +; CHECK-NEXT: lw s9, 616(sp) +; CHECK-NEXT: lw s2, 620(sp) ; CHECK-NEXT: lw s3, 624(sp) -; CHECK-NEXT: lw s11, 628(sp) -; CHECK-NEXT: sw s2, 548(sp) +; CHECK-NEXT: lw s4, 628(sp) +; CHECK-NEXT: sw s0, 548(sp) ; CHECK-NEXT: sw s8, 544(sp) ; CHECK-NEXT: sw s7, 540(sp) ; CHECK-NEXT: sw s6, 536(sp) -; CHECK-NEXT: sw s11, 564(sp) +; CHECK-NEXT: sw s4, 564(sp) ; CHECK-NEXT: sw s3, 560(sp) -; CHECK-NEXT: sw s5, 556(sp) +; CHECK-NEXT: sw s2, 556(sp) ; CHECK-NEXT: addi a0, sp, 568 ; CHECK-NEXT: addi a1, sp, 552 ; CHECK-NEXT: addi a2, sp, 536 -; CHECK-NEXT: sw s4, 552(sp) +; CHECK-NEXT: sw s9, 552(sp) ; CHECK-NEXT: call __subtf3@plt ; CHECK-NEXT: lw a0, 568(sp) ; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill @@ -73,7 +73,7 @@ ; CHECK-NEXT: sw zero, 496(sp) ; CHECK-NEXT: sw zero, 492(sp) ; CHECK-NEXT: sw zero, 488(sp) -; CHECK-NEXT: sw s2, 516(sp) +; CHECK-NEXT: sw s0, 516(sp) ; CHECK-NEXT: sw s8, 512(sp) ; CHECK-NEXT: sw s7, 508(sp) ; CHECK-NEXT: addi a0, sp, 520 @@ -81,10 +81,10 @@ ; CHECK-NEXT: addi a2, sp, 488 ; CHECK-NEXT: sw s6, 504(sp) ; CHECK-NEXT: call __addtf3@plt -; CHECK-NEXT: lw s9, 520(sp) +; CHECK-NEXT: lw s11, 520(sp) ; CHECK-NEXT: lw s10, 524(sp) -; CHECK-NEXT: lw s0, 528(sp) -; CHECK-NEXT: sw s0, 20(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s5, 528(sp) +; CHECK-NEXT: sw s5, 20(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw s1, 532(sp) ; CHECK-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, %hi(Y1) @@ -100,13 +100,13 @@ ; CHECK-NEXT: sw a3, 304(sp) ; CHECK-NEXT: sw a2, 300(sp) ; CHECK-NEXT: sw a1, 296(sp) -; CHECK-NEXT: sw s11, 324(sp) +; CHECK-NEXT: sw s4, 324(sp) ; CHECK-NEXT: sw s3, 320(sp) -; CHECK-NEXT: sw s5, 316(sp) +; CHECK-NEXT: sw s2, 316(sp) ; CHECK-NEXT: addi a0, sp, 328 ; CHECK-NEXT: addi a1, sp, 312 ; CHECK-NEXT: addi a2, sp, 296 -; CHECK-NEXT: sw s4, 312(sp) +; CHECK-NEXT: sw s9, 312(sp) ; CHECK-NEXT: call __multf3@plt ; CHECK-NEXT: lw a0, 328(sp) ; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill @@ -114,18 +114,18 @@ ; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw a0, 336(sp) ; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw s4, 340(sp) -; CHECK-NEXT: sw s2, 468(sp) +; CHECK-NEXT: lw s9, 340(sp) +; CHECK-NEXT: sw s0, 468(sp) ; CHECK-NEXT: sw s8, 464(sp) ; CHECK-NEXT: sw s7, 460(sp) ; CHECK-NEXT: sw s6, 456(sp) ; CHECK-NEXT: sw s1, 452(sp) -; CHECK-NEXT: sw s0, 448(sp) +; CHECK-NEXT: sw s5, 448(sp) ; CHECK-NEXT: sw s10, 444(sp) ; CHECK-NEXT: addi a0, sp, 472 ; CHECK-NEXT: addi a1, sp, 456 ; CHECK-NEXT: addi a2, sp, 440 -; CHECK-NEXT: sw s9, 440(sp) +; CHECK-NEXT: sw s11, 440(sp) ; CHECK-NEXT: call __addtf3@plt ; CHECK-NEXT: lw a3, 472(sp) ; CHECK-NEXT: lw a0, 476(sp) @@ -152,31 +152,31 @@ ; CHECK-NEXT: sw a2, %lo(X+8)(a4) ; CHECK-NEXT: sw a3, %lo(X+4)(a4) ; CHECK-NEXT: sw a0, %lo(X)(a4) -; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s8, 212(sp) -; CHECK-NEXT: lw s7, 8(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s7, 208(sp) -; CHECK-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s11, 204(sp) +; CHECK-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s4, 212(sp) +; CHECK-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s3, 208(sp) +; CHECK-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s2, 204(sp) ; CHECK-NEXT: lw a0, 52(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 200(sp) ; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 228(sp) -; CHECK-NEXT: lw s3, 24(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s3, 224(sp) -; CHECK-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s2, 220(sp) +; CHECK-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s1, 224(sp) +; CHECK-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s0, 220(sp) ; CHECK-NEXT: addi a0, sp, 232 ; CHECK-NEXT: addi a1, sp, 216 ; CHECK-NEXT: addi a2, sp, 200 -; CHECK-NEXT: lw s1, 40(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s1, 216(sp) +; CHECK-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s8, 216(sp) ; CHECK-NEXT: call __multf3@plt ; CHECK-NEXT: lw s5, 232(sp) ; CHECK-NEXT: lw a0, 236(sp) ; CHECK-NEXT: sw a0, 0(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw s6, 240(sp) -; CHECK-NEXT: lw s0, 244(sp) +; CHECK-NEXT: lw s7, 244(sp) ; CHECK-NEXT: sw zero, 356(sp) ; CHECK-NEXT: sw zero, 352(sp) ; CHECK-NEXT: sw zero, 348(sp) @@ -189,7 +189,7 @@ ; CHECK-NEXT: addi a0, sp, 376 ; CHECK-NEXT: addi a1, sp, 360 ; CHECK-NEXT: addi a2, sp, 344 -; CHECK-NEXT: sw s9, 360(sp) +; CHECK-NEXT: sw s11, 360(sp) ; CHECK-NEXT: call __multf3@plt ; CHECK-NEXT: lw a0, 376(sp) ; CHECK-NEXT: lw a1, 388(sp) @@ -202,10 +202,10 @@ ; CHECK-NEXT: sw a0, %lo(S)(a4) ; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 260(sp) -; CHECK-NEXT: sw s3, 256(sp) -; CHECK-NEXT: sw s2, 252(sp) -; CHECK-NEXT: sw s1, 248(sp) -; CHECK-NEXT: sw s4, 276(sp) +; CHECK-NEXT: sw s1, 256(sp) +; CHECK-NEXT: sw s0, 252(sp) +; CHECK-NEXT: sw s8, 248(sp) +; CHECK-NEXT: sw s9, 276(sp) ; CHECK-NEXT: lw a0, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 272(sp) ; CHECK-NEXT: lw a0, 36(sp) # 4-byte Folded Reload @@ -229,7 +229,7 @@ ; CHECK-NEXT: sw zero, 160(sp) ; CHECK-NEXT: sw zero, 156(sp) ; CHECK-NEXT: sw zero, 152(sp) -; CHECK-NEXT: sw s0, 180(sp) +; CHECK-NEXT: sw s7, 180(sp) ; CHECK-NEXT: sw s6, 176(sp) ; CHECK-NEXT: lw a0, 0(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 172(sp) @@ -251,9 +251,9 @@ ; CHECK-NEXT: sw zero, 112(sp) ; CHECK-NEXT: sw zero, 108(sp) ; CHECK-NEXT: sw zero, 104(sp) -; CHECK-NEXT: sw s8, 132(sp) -; CHECK-NEXT: sw s7, 128(sp) -; CHECK-NEXT: sw s11, 124(sp) +; CHECK-NEXT: sw s4, 132(sp) +; CHECK-NEXT: sw s3, 128(sp) +; CHECK-NEXT: sw s2, 124(sp) ; CHECK-NEXT: addi a0, sp, 136 ; CHECK-NEXT: addi a1, sp, 120 ; CHECK-NEXT: addi a2, sp, 104 diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -45,12 +45,12 @@ ; CHECK-NOT: tail callee_indirect2 ; CHECK: lui a0, %hi(callee_indirect2) -; CHECK-NEXT: addi a5, a0, %lo(callee_indirect2) -; CHECK-NEXT: jr a5 +; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2) +; CHECK-NEXT: jr t1 ; CHECK: lui a0, %hi(callee_indirect1) -; CHECK-NEXT: addi a5, a0, %lo(callee_indirect1) -; CHECK-NEXT: jr a5 +; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1) +; CHECK-NEXT: jr t1 entry: %tobool = icmp eq i32 %a, 0 %callee = select i1 %tobool, void ()* @callee_indirect1, void ()* @callee_indirect2 diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -10,103 +10,99 @@ ; RISCV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s6, 4(sp) # 4-byte Folded Spill -; RISCV32-NEXT: lw a6, 12(a1) +; RISCV32-NEXT: lw a3, 12(a1) ; RISCV32-NEXT: lw a7, 12(a2) -; RISCV32-NEXT: lw t3, 8(a1) +; RISCV32-NEXT: lw a6, 8(a1) ; RISCV32-NEXT: lw a4, 0(a2) ; RISCV32-NEXT: lw a5, 0(a1) -; RISCV32-NEXT: lw a3, 4(a1) -; RISCV32-NEXT: lw s2, 8(a2) +; RISCV32-NEXT: lw t3, 4(a1) +; RISCV32-NEXT: lw t0, 8(a2) ; RISCV32-NEXT: lw a2, 4(a2) ; RISCV32-NEXT: mulhu a1, a5, a4 -; RISCV32-NEXT: mul s1, a3, a4 -; RISCV32-NEXT: add a1, s1, a1 -; RISCV32-NEXT: sltu s1, a1, s1 -; RISCV32-NEXT: mulhu s0, a3, a4 -; RISCV32-NEXT: add t4, s0, s1 -; RISCV32-NEXT: mul s0, a5, a2 -; RISCV32-NEXT: add t0, s0, a1 -; RISCV32-NEXT: sltu a1, t0, s0 -; RISCV32-NEXT: mulhu s0, a5, a2 -; RISCV32-NEXT: add a1, s0, a1 -; RISCV32-NEXT: add a1, t4, a1 -; RISCV32-NEXT: mul s0, a3, a2 -; RISCV32-NEXT: add s1, s0, a1 -; RISCV32-NEXT: mul t1, s2, a5 -; RISCV32-NEXT: mul s3, t3, a4 +; RISCV32-NEXT: mul t1, t3, a4 +; RISCV32-NEXT: add a1, t1, a1 +; RISCV32-NEXT: sltu t1, a1, t1 +; RISCV32-NEXT: mulhu t2, t3, a4 +; RISCV32-NEXT: add t4, t2, t1 +; RISCV32-NEXT: mul t1, a5, a2 +; RISCV32-NEXT: add a1, t1, a1 +; RISCV32-NEXT: sltu t1, a1, t1 +; RISCV32-NEXT: mulhu t2, a5, a2 +; RISCV32-NEXT: add t1, t2, t1 +; RISCV32-NEXT: add t5, t4, t1 +; RISCV32-NEXT: mul t6, t3, a2 +; RISCV32-NEXT: add s0, t6, t5 +; RISCV32-NEXT: mul t1, t0, a5 +; RISCV32-NEXT: mul s3, a6, a4 ; RISCV32-NEXT: add s4, s3, t1 -; RISCV32-NEXT: add t1, s1, s4 -; RISCV32-NEXT: sltu t2, t1, s1 -; RISCV32-NEXT: sltu s1, s1, s0 -; RISCV32-NEXT: sltu a1, a1, t4 -; RISCV32-NEXT: mulhu s0, a3, a2 -; RISCV32-NEXT: add a1, s0, a1 -; RISCV32-NEXT: add s0, a1, s1 -; RISCV32-NEXT: mul a1, a3, s2 -; RISCV32-NEXT: mul s1, a7, a5 -; RISCV32-NEXT: add a1, s1, a1 -; RISCV32-NEXT: mulhu s5, s2, a5 -; RISCV32-NEXT: add s6, s5, a1 -; RISCV32-NEXT: mul s1, a2, t3 -; RISCV32-NEXT: mul a1, a6, a4 -; RISCV32-NEXT: add a1, a1, s1 -; RISCV32-NEXT: mulhu t5, t3, a4 -; RISCV32-NEXT: add t6, t5, a1 -; RISCV32-NEXT: add a1, t6, s6 -; RISCV32-NEXT: sltu s1, s4, s3 -; RISCV32-NEXT: add a1, a1, s1 -; RISCV32-NEXT: add a1, s0, a1 -; RISCV32-NEXT: add t4, a1, t2 +; RISCV32-NEXT: add t1, s0, s4 +; RISCV32-NEXT: sltu t2, t1, s0 +; RISCV32-NEXT: sltu t6, s0, t6 +; RISCV32-NEXT: sltu t4, t5, t4 +; RISCV32-NEXT: mulhu t5, t3, a2 +; RISCV32-NEXT: add t4, t5, t4 +; RISCV32-NEXT: add s0, t4, t6 +; RISCV32-NEXT: mul t4, t3, t0 +; RISCV32-NEXT: mul t5, a7, a5 +; RISCV32-NEXT: add t4, t5, t4 +; RISCV32-NEXT: mulhu s1, t0, a5 +; RISCV32-NEXT: add s2, s1, t4 +; RISCV32-NEXT: mul t4, a2, a6 +; RISCV32-NEXT: mul t5, a3, a4 +; RISCV32-NEXT: add t4, t5, t4 +; RISCV32-NEXT: mulhu t5, a6, a4 +; RISCV32-NEXT: add t6, t5, t4 +; RISCV32-NEXT: add t4, t6, s2 +; RISCV32-NEXT: sltu s3, s4, s3 +; RISCV32-NEXT: add t4, t4, s3 +; RISCV32-NEXT: add t4, s0, t4 +; RISCV32-NEXT: add t4, t4, t2 ; RISCV32-NEXT: beq t4, s0, .LBB0_2 ; RISCV32-NEXT: # %bb.1: # %start ; RISCV32-NEXT: sltu t2, t4, s0 ; RISCV32-NEXT: .LBB0_2: # %start -; RISCV32-NEXT: sltu a1, s6, s5 +; RISCV32-NEXT: sltu s0, s2, s1 +; RISCV32-NEXT: snez s1, t3 +; RISCV32-NEXT: snez s2, a7 +; RISCV32-NEXT: and s1, s2, s1 +; RISCV32-NEXT: mulhu s2, a7, a5 +; RISCV32-NEXT: snez s2, s2 +; RISCV32-NEXT: or s1, s1, s2 +; RISCV32-NEXT: mulhu t3, t3, t0 +; RISCV32-NEXT: snez t3, t3 +; RISCV32-NEXT: or t3, s1, t3 +; RISCV32-NEXT: or t3, t3, s0 +; RISCV32-NEXT: sltu t5, t6, t5 +; RISCV32-NEXT: snez t6, a2 ; RISCV32-NEXT: snez s0, a3 -; RISCV32-NEXT: snez s1, a7 -; RISCV32-NEXT: and s0, s1, s0 -; RISCV32-NEXT: mulhu s1, a7, a5 -; RISCV32-NEXT: snez s1, s1 -; RISCV32-NEXT: or s0, s0, s1 -; RISCV32-NEXT: mulhu a3, a3, s2 -; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or a3, s0, a3 -; RISCV32-NEXT: or a1, a3, a1 -; RISCV32-NEXT: sltu a3, t6, t5 -; RISCV32-NEXT: snez s1, a2 -; RISCV32-NEXT: snez s0, a6 -; RISCV32-NEXT: and s1, s0, s1 -; RISCV32-NEXT: mulhu s0, a6, a4 +; RISCV32-NEXT: and t6, s0, t6 +; RISCV32-NEXT: mulhu s0, a3, a4 ; RISCV32-NEXT: snez s0, s0 -; RISCV32-NEXT: or s1, s1, s0 -; RISCV32-NEXT: mulhu a2, a2, t3 +; RISCV32-NEXT: or t6, t6, s0 +; RISCV32-NEXT: mulhu a2, a2, a6 ; RISCV32-NEXT: snez a2, a2 -; RISCV32-NEXT: or a2, s1, a2 -; RISCV32-NEXT: or a2, a2, a3 -; RISCV32-NEXT: or a3, s2, a7 +; RISCV32-NEXT: or a2, t6, a2 +; RISCV32-NEXT: or a2, a2, t5 +; RISCV32-NEXT: or a7, t0, a7 +; RISCV32-NEXT: snez a7, a7 +; RISCV32-NEXT: or a3, a6, a3 ; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or s1, t3, a6 -; RISCV32-NEXT: snez s1, s1 -; RISCV32-NEXT: and a3, s1, a3 +; RISCV32-NEXT: and a3, a3, a7 ; RISCV32-NEXT: or a2, a3, a2 -; RISCV32-NEXT: or a1, a2, a1 -; RISCV32-NEXT: or a1, a1, t2 -; RISCV32-NEXT: mul a2, a5, a4 -; RISCV32-NEXT: andi a1, a1, 1 -; RISCV32-NEXT: sw a2, 0(a0) -; RISCV32-NEXT: sw t0, 4(a0) +; RISCV32-NEXT: or a2, a2, t3 +; RISCV32-NEXT: or a2, a2, t2 +; RISCV32-NEXT: mul a3, a5, a4 +; RISCV32-NEXT: andi a2, a2, 1 +; RISCV32-NEXT: sw a3, 0(a0) +; RISCV32-NEXT: sw a1, 4(a0) ; RISCV32-NEXT: sw t1, 8(a0) ; RISCV32-NEXT: sw t4, 12(a0) -; RISCV32-NEXT: sb a1, 16(a0) +; RISCV32-NEXT: sb a2, 16(a0) ; RISCV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RISCV32-NEXT: addi sp, sp, 32 ; RISCV32-NEXT: ret start: diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -394,8 +394,8 @@ ; RV64-NEXT: lwu a1, 0(s0) ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli s2, a0, 11 -; RV64-NEXT: srli s1, a0, 22 +; RV64-NEXT: srli s1, a0, 11 +; RV64-NEXT: srli s2, a0, 22 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 683 ; RV64-NEXT: call __muldi3@plt @@ -407,14 +407,14 @@ ; RV64-NEXT: li a1, 341 ; RV64-NEXT: sltu s3, a1, a0 ; RV64-NEXT: li a1, 819 -; RV64-NEXT: mv a0, s1 +; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: addiw a0, a0, -1638 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 1 -; RV64-NEXT: sltu s1, a1, a0 +; RV64-NEXT: sltu s2, a1, a0 ; RV64-NEXT: li a1, 1463 -; RV64-NEXT: mv a0, s2 +; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: addiw a0, a0, -1463 ; RV64-NEXT: andi a0, a0, 2047 @@ -426,7 +426,7 @@ ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: slli a0, a0, 11 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a1, s1, 22 +; RV64-NEXT: slli a1, s2, 22 ; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: sw a0, 0(s0) ; RV64-NEXT: slli a0, a0, 31 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -19,127 +19,123 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 12(a1) -; RV32I-NEXT: lhu s3, 8(a1) -; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu s0, 12(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 124 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 98 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 1003 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_urem_vec_1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 12(a1) +; RV32IM-NEXT: lhu a2, 12(a1) ; RV32IM-NEXT: lhu a3, 8(a1) ; RV32IM-NEXT: lhu a4, 0(a1) ; RV32IM-NEXT: lhu a1, 4(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 ; RV32IM-NEXT: mulhu a5, a4, a5 -; RV32IM-NEXT: sub a2, a4, a5 -; RV32IM-NEXT: srli a2, a2, 1 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a2, a4, a2 -; RV32IM-NEXT: srli a4, a1, 2 -; RV32IM-NEXT: lui a5, 135300 -; RV32IM-NEXT: addi a5, a5, 529 -; RV32IM-NEXT: mulhu a4, a4, a5 -; RV32IM-NEXT: srli a4, a4, 2 -; RV32IM-NEXT: li a5, 124 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a1, a1, a4 -; RV32IM-NEXT: lui a4, 342392 -; RV32IM-NEXT: addi a4, a4, 669 -; RV32IM-NEXT: mulhu a4, a3, a4 -; RV32IM-NEXT: srli a4, a4, 5 -; RV32IM-NEXT: li a5, 98 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a3, a3, a4 -; RV32IM-NEXT: lui a4, 267633 -; RV32IM-NEXT: addi a4, a4, -1809 -; RV32IM-NEXT: mulhu a4, a6, a4 -; RV32IM-NEXT: srli a4, a4, 8 -; RV32IM-NEXT: li a5, 1003 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a4, a6, a4 -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sub a6, a4, a5 +; RV32IM-NEXT: srli a6, a6, 1 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: srli a5, a1, 2 +; RV32IM-NEXT: lui a6, 135300 +; RV32IM-NEXT: addi a6, a6, 529 +; RV32IM-NEXT: mulhu a5, a5, a6 +; RV32IM-NEXT: srli a5, a5, 2 +; RV32IM-NEXT: li a6, 124 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: lui a5, 342392 +; RV32IM-NEXT: addi a5, a5, 669 +; RV32IM-NEXT: mulhu a5, a3, a5 +; RV32IM-NEXT: srli a5, a5, 5 +; RV32IM-NEXT: li a6, 98 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: lui a5, 267633 +; RV32IM-NEXT: addi a5, a5, -1809 +; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: srli a5, a5, 8 +; RV32IM-NEXT: li a6, 1003 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_1: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 24(a1) -; RV64I-NEXT: lhu s3, 16(a1) -; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lhu s0, 24(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 124 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 98 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 1003 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_urem_vec_1: @@ -147,44 +143,44 @@ ; RV64IM-NEXT: lhu a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) -; RV64IM-NEXT: lhu a6, 24(a1) +; RV64IM-NEXT: lhu a4, 24(a1) ; RV64IM-NEXT: lhu a5, 16(a1) ; RV64IM-NEXT: lhu a1, 8(a1) ; RV64IM-NEXT: mulhu a3, a2, a3 -; RV64IM-NEXT: sub a4, a2, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: srli a6, a6, 1 +; RV64IM-NEXT: add a3, a6, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) -; RV64IM-NEXT: mulw a3, a3, a7 -; RV64IM-NEXT: subw t0, a2, a3 +; RV64IM-NEXT: li a6, 95 +; RV64IM-NEXT: lui a7, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a7, %lo(.LCPI0_1)(a7) +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 2 -; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: mulhu a3, a3, a7 ; RV64IM-NEXT: srli a3, a3, 3 -; RV64IM-NEXT: li a7, 124 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) -; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: li a6, 124 +; RV64IM-NEXT: lui a7, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a7, %lo(.LCPI0_2)(a7) +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a1, a1, a3 ; RV64IM-NEXT: srli a3, a5, 1 -; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: mulhu a3, a3, a7 ; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) -; RV64IM-NEXT: li a2, 98 -; RV64IM-NEXT: mulw a2, a3, a2 -; RV64IM-NEXT: subw a2, a5, a2 -; RV64IM-NEXT: mulhu a3, a6, a4 -; RV64IM-NEXT: srli a3, a3, 7 -; RV64IM-NEXT: li a4, 1003 -; RV64IM-NEXT: mulw a3, a3, a4 -; RV64IM-NEXT: subw a3, a6, a3 -; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: lui a6, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6) +; RV64IM-NEXT: li a7, 98 +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a3, a5, a3 +; RV64IM-NEXT: mulhu a5, a4, a6 +; RV64IM-NEXT: srli a5, a5, 7 +; RV64IM-NEXT: li a6, 1003 +; RV64IM-NEXT: mulw a5, a5, a6 +; RV64IM-NEXT: subw a4, a4, a5 +; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: sh a3, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -200,126 +196,122 @@ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 12(a1) -; RV32I-NEXT: lhu s3, 8(a1) -; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu s0, 12(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_urem_vec_2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 12(a1) -; RV32IM-NEXT: lhu a7, 8(a1) +; RV32IM-NEXT: lhu a2, 12(a1) +; RV32IM-NEXT: lhu a3, 8(a1) ; RV32IM-NEXT: lhu a4, 0(a1) ; RV32IM-NEXT: lhu a1, 4(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 -; RV32IM-NEXT: mulhu a2, a4, a5 -; RV32IM-NEXT: sub a3, a4, a2 -; RV32IM-NEXT: srli a3, a3, 1 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: li a3, 95 -; RV32IM-NEXT: mul a2, a2, a3 -; RV32IM-NEXT: sub t0, a4, a2 -; RV32IM-NEXT: mulhu a4, a1, a5 -; RV32IM-NEXT: sub a2, a1, a4 -; RV32IM-NEXT: srli a2, a2, 1 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: mul a2, a2, a3 -; RV32IM-NEXT: sub a1, a1, a2 -; RV32IM-NEXT: mulhu a2, a7, a5 -; RV32IM-NEXT: sub a4, a7, a2 -; RV32IM-NEXT: srli a4, a4, 1 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: mul a2, a2, a3 -; RV32IM-NEXT: sub a2, a7, a2 -; RV32IM-NEXT: mulhu a4, a6, a5 -; RV32IM-NEXT: sub a5, a6, a4 -; RV32IM-NEXT: srli a5, a5, 1 -; RV32IM-NEXT: add a4, a5, a4 -; RV32IM-NEXT: srli a4, a4, 6 -; RV32IM-NEXT: mul a3, a4, a3 -; RV32IM-NEXT: sub a3, a6, a3 -; RV32IM-NEXT: sh a3, 6(a0) -; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: sub a7, a4, a6 +; RV32IM-NEXT: srli a7, a7, 1 +; RV32IM-NEXT: add a6, a7, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: li a7, 95 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a4, a4, a6 +; RV32IM-NEXT: mulhu a6, a1, a5 +; RV32IM-NEXT: sub t0, a1, a6 +; RV32IM-NEXT: srli t0, t0, 1 +; RV32IM-NEXT: add a6, t0, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a1, a1, a6 +; RV32IM-NEXT: mulhu a6, a3, a5 +; RV32IM-NEXT: sub t0, a3, a6 +; RV32IM-NEXT: srli t0, t0, 1 +; RV32IM-NEXT: add a6, t0, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a3, a3, a6 +; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: sub a6, a2, a5 +; RV32IM-NEXT: srli a6, a6, 1 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: mul a5, a5, a7 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh t0, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_2: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 24(a1) -; RV64I-NEXT: lhu s3, 16(a1) -; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lhu s0, 24(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_urem_vec_2: @@ -327,42 +319,42 @@ ; RV64IM-NEXT: lhu a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) -; RV64IM-NEXT: lhu a6, 24(a1) -; RV64IM-NEXT: lhu a7, 16(a1) +; RV64IM-NEXT: lhu a4, 24(a1) +; RV64IM-NEXT: lhu a5, 16(a1) ; RV64IM-NEXT: lhu a1, 8(a1) -; RV64IM-NEXT: mulhu a4, a2, a3 -; RV64IM-NEXT: sub a5, a2, a4 -; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw t0, a2, a4 -; RV64IM-NEXT: mulhu a4, a1, a3 -; RV64IM-NEXT: sub a2, a1, a4 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a4 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: mulhu a2, a7, a3 -; RV64IM-NEXT: sub a4, a7, a2 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a2, a4, a2 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a2, a7, a2 -; RV64IM-NEXT: mulhu a3, a6, a3 -; RV64IM-NEXT: sub a4, a6, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: mulhu a6, a2, a3 +; RV64IM-NEXT: sub a7, a2, a6 +; RV64IM-NEXT: srli a7, a7, 1 +; RV64IM-NEXT: add a6, a7, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: mulhu a6, a1, a3 +; RV64IM-NEXT: sub t0, a1, a6 +; RV64IM-NEXT: srli t0, t0, 1 +; RV64IM-NEXT: add a6, t0, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a1, a1, a6 +; RV64IM-NEXT: mulhu a6, a5, a3 +; RV64IM-NEXT: sub t0, a5, a6 +; RV64IM-NEXT: srli t0, t0, 1 +; RV64IM-NEXT: add a6, t0, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a5, a5, a6 +; RV64IM-NEXT: mulhu a3, a4, a3 +; RV64IM-NEXT: sub a6, a4, a3 +; RV64IM-NEXT: srli a6, a6, 1 +; RV64IM-NEXT: add a3, a6, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: mulw a3, a3, a5 -; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a5, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -384,47 +376,46 @@ ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 0(a1) -; RV32I-NEXT: lhu s3, 4(a1) -; RV32I-NEXT: lhu s4, 8(a1) -; RV32I-NEXT: lhu s1, 12(a1) +; RV32I-NEXT: lhu s1, 0(a1) +; RV32I-NEXT: lhu s2, 4(a1) +; RV32I-NEXT: lhu s3, 8(a1) +; RV32I-NEXT: lhu s4, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s8, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: mv s9, a0 -; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __udivsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __udivsi3@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __udivsi3@plt ; RV32I-NEXT: add a0, s8, a0 -; RV32I-NEXT: add a1, s7, s1 -; RV32I-NEXT: add a2, s6, s4 -; RV32I-NEXT: add a3, s5, s9 +; RV32I-NEXT: add a1, s7, s2 +; RV32I-NEXT: add a2, s6, s3 +; RV32I-NEXT: add a3, s5, s4 ; RV32I-NEXT: sh a3, 6(s0) ; RV32I-NEXT: sh a2, 4(s0) ; RV32I-NEXT: sh a1, 2(s0) @@ -439,127 +430,124 @@ ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: combine_urem_udiv: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 0(a1) -; RV32IM-NEXT: lhu a7, 4(a1) +; RV32IM-NEXT: lhu a2, 0(a1) +; RV32IM-NEXT: lhu a3, 4(a1) ; RV32IM-NEXT: lhu a4, 12(a1) ; RV32IM-NEXT: lhu a1, 8(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 -; RV32IM-NEXT: mulhu a2, a4, a5 -; RV32IM-NEXT: sub a3, a4, a2 -; RV32IM-NEXT: srli a3, a3, 1 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: srli t3, a2, 6 -; RV32IM-NEXT: li t0, 95 -; RV32IM-NEXT: mul a3, t3, t0 -; RV32IM-NEXT: sub t1, a4, a3 -; RV32IM-NEXT: mulhu a4, a1, a5 -; RV32IM-NEXT: sub a3, a1, a4 -; RV32IM-NEXT: srli a3, a3, 1 -; RV32IM-NEXT: add a3, a3, a4 -; RV32IM-NEXT: srli a3, a3, 6 -; RV32IM-NEXT: mul a4, a3, t0 -; RV32IM-NEXT: sub t2, a1, a4 -; RV32IM-NEXT: mulhu a4, a7, a5 -; RV32IM-NEXT: sub a1, a7, a4 -; RV32IM-NEXT: srli a1, a1, 1 -; RV32IM-NEXT: add a1, a1, a4 -; RV32IM-NEXT: srli a1, a1, 6 -; RV32IM-NEXT: mul a4, a1, t0 -; RV32IM-NEXT: sub a4, a7, a4 -; RV32IM-NEXT: mulhu a5, a6, a5 -; RV32IM-NEXT: sub a2, a6, a5 -; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: sub a7, a4, a6 +; RV32IM-NEXT: srli a7, a7, 1 +; RV32IM-NEXT: add a6, a7, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: li a7, 95 +; RV32IM-NEXT: mul t0, a6, a7 +; RV32IM-NEXT: sub a4, a4, t0 +; RV32IM-NEXT: mulhu t0, a1, a5 +; RV32IM-NEXT: sub t1, a1, t0 +; RV32IM-NEXT: srli t1, t1, 1 +; RV32IM-NEXT: add t0, t1, t0 +; RV32IM-NEXT: srli t0, t0, 6 +; RV32IM-NEXT: mul t1, t0, a7 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulhu t1, a3, a5 +; RV32IM-NEXT: sub t2, a3, t1 +; RV32IM-NEXT: srli t2, t2, 1 +; RV32IM-NEXT: add t1, t2, t1 +; RV32IM-NEXT: srli t1, t1, 6 +; RV32IM-NEXT: mul t2, t1, a7 +; RV32IM-NEXT: sub a3, a3, t2 +; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: sub t2, a2, a5 +; RV32IM-NEXT: srli t2, t2, 1 +; RV32IM-NEXT: add a5, t2, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: mul a7, a5, a7 +; RV32IM-NEXT: sub a2, a2, a7 ; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: mul a5, a2, t0 -; RV32IM-NEXT: sub a5, a6, a5 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: add a1, a4, a1 -; RV32IM-NEXT: add a3, t2, a3 -; RV32IM-NEXT: add a4, t1, t3 +; RV32IM-NEXT: add a3, a3, t1 +; RV32IM-NEXT: add a1, a1, t0 +; RV32IM-NEXT: add a4, a4, a6 ; RV32IM-NEXT: sh a4, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_urem_udiv: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 0(a1) -; RV64I-NEXT: lhu s3, 8(a1) -; RV64I-NEXT: lhu s4, 16(a1) -; RV64I-NEXT: lhu s1, 24(a1) +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lhu s1, 0(a1) +; RV64I-NEXT: lhu s2, 8(a1) +; RV64I-NEXT: lhu s3, 16(a1) +; RV64I-NEXT: lhu s4, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s5, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s6, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s7, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s8, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: mv s9, a0 -; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __udivdi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __udivdi3@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __udivdi3@plt ; RV64I-NEXT: addw a0, s8, a0 -; RV64I-NEXT: addw a1, s7, s1 -; RV64I-NEXT: addw a2, s6, s4 -; RV64I-NEXT: addw a3, s5, s9 +; RV64I-NEXT: addw a1, s7, s2 +; RV64I-NEXT: addw a2, s6, s3 +; RV64I-NEXT: addw a3, s5, s4 ; RV64I-NEXT: sh a3, 6(s0) ; RV64I-NEXT: sh a2, 4(s0) ; RV64I-NEXT: sh a1, 2(s0) ; RV64I-NEXT: sh a0, 0(s0) -; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: combine_urem_udiv: @@ -567,45 +555,45 @@ ; RV64IM-NEXT: lhu a2, 24(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) -; RV64IM-NEXT: lhu a6, 0(a1) -; RV64IM-NEXT: lhu a7, 8(a1) +; RV64IM-NEXT: lhu a4, 0(a1) +; RV64IM-NEXT: lhu a5, 8(a1) ; RV64IM-NEXT: lhu a1, 16(a1) -; RV64IM-NEXT: mulhu a4, a2, a3 -; RV64IM-NEXT: sub a5, a2, a4 -; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli t3, a4, 6 -; RV64IM-NEXT: li t0, 95 -; RV64IM-NEXT: mulw a5, t3, t0 -; RV64IM-NEXT: subw t1, a2, a5 -; RV64IM-NEXT: mulhu a5, a1, a3 -; RV64IM-NEXT: sub a2, a1, a5 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a5, a2, t0 -; RV64IM-NEXT: subw t2, a1, a5 -; RV64IM-NEXT: mulhu a5, a7, a3 -; RV64IM-NEXT: sub a1, a7, a5 -; RV64IM-NEXT: srli a1, a1, 1 -; RV64IM-NEXT: add a1, a1, a5 -; RV64IM-NEXT: srli a1, a1, 6 -; RV64IM-NEXT: mulw a5, a1, t0 -; RV64IM-NEXT: subw a5, a7, a5 -; RV64IM-NEXT: mulhu a3, a6, a3 -; RV64IM-NEXT: sub a4, a6, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: mulhu a6, a2, a3 +; RV64IM-NEXT: sub a7, a2, a6 +; RV64IM-NEXT: srli a7, a7, 1 +; RV64IM-NEXT: add a6, a7, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw t0, a6, a7 +; RV64IM-NEXT: subw a2, a2, t0 +; RV64IM-NEXT: mulhu t0, a1, a3 +; RV64IM-NEXT: sub t1, a1, t0 +; RV64IM-NEXT: srli t1, t1, 1 +; RV64IM-NEXT: add t0, t1, t0 +; RV64IM-NEXT: srli t0, t0, 6 +; RV64IM-NEXT: mulw t1, t0, a7 +; RV64IM-NEXT: subw a1, a1, t1 +; RV64IM-NEXT: mulhu t1, a5, a3 +; RV64IM-NEXT: sub t2, a5, t1 +; RV64IM-NEXT: srli t2, t2, 1 +; RV64IM-NEXT: add t1, t2, t1 +; RV64IM-NEXT: srli t1, t1, 6 +; RV64IM-NEXT: mulw t2, t1, a7 +; RV64IM-NEXT: subw a5, a5, t2 +; RV64IM-NEXT: mulhu a3, a4, a3 +; RV64IM-NEXT: sub t2, a4, a3 +; RV64IM-NEXT: srli t2, t2, 1 +; RV64IM-NEXT: add a3, t2, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: mulw a4, a3, t0 -; RV64IM-NEXT: subw a4, a6, a4 +; RV64IM-NEXT: mulw a7, a3, a7 +; RV64IM-NEXT: subw a4, a4, a7 ; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: addw a1, a5, a1 -; RV64IM-NEXT: addw a2, t2, a2 -; RV64IM-NEXT: addw a4, t1, t3 -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: addw a4, a5, t1 +; RV64IM-NEXT: addw a1, a1, t0 +; RV64IM-NEXT: addw a2, a2, a6 +; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, @@ -624,17 +612,17 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 8(a1) -; RV32I-NEXT: lhu s3, 4(a1) -; RV32I-NEXT: lhu s1, 0(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 4(a1) +; RV32I-NEXT: lhu s3, 0(a1) ; RV32I-NEXT: lhu a2, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: andi a1, s1, 63 -; RV32I-NEXT: andi a2, s3, 31 -; RV32I-NEXT: andi a3, s2, 7 +; RV32I-NEXT: andi a1, s3, 63 +; RV32I-NEXT: andi a2, s2, 31 +; RV32I-NEXT: andi a3, s1, 7 ; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: sh a3, 4(s0) ; RV32I-NEXT: sh a2, 2(s0) @@ -649,27 +637,27 @@ ; ; RV32IM-LABEL: dont_fold_urem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 8(a1) +; RV32IM-NEXT: lhu a2, 8(a1) ; RV32IM-NEXT: lhu a3, 4(a1) ; RV32IM-NEXT: lhu a4, 12(a1) ; RV32IM-NEXT: lhu a1, 0(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 ; RV32IM-NEXT: mulhu a5, a4, a5 -; RV32IM-NEXT: sub a2, a4, a5 -; RV32IM-NEXT: srli a2, a2, 1 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a2, a4, a2 +; RV32IM-NEXT: sub a6, a4, a5 +; RV32IM-NEXT: srli a6, a6, 1 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: andi a1, a1, 63 ; RV32IM-NEXT: andi a3, a3, 31 -; RV32IM-NEXT: andi a4, a6, 7 -; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: andi a2, a2, 7 +; RV32IM-NEXT: sh a2, 4(a0) ; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) -; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_urem_power_of_two: @@ -680,17 +668,17 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 16(a1) -; RV64I-NEXT: lhu s3, 8(a1) -; RV64I-NEXT: lhu s1, 0(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 8(a1) +; RV64I-NEXT: lhu s3, 0(a1) ; RV64I-NEXT: lhu a2, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: andi a1, s1, 63 -; RV64I-NEXT: andi a2, s3, 31 -; RV64I-NEXT: andi a3, s2, 7 +; RV64I-NEXT: andi a1, s3, 63 +; RV64I-NEXT: andi a2, s2, 31 +; RV64I-NEXT: andi a3, s1, 7 ; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: sh a3, 4(s0) ; RV64I-NEXT: sh a2, 2(s0) @@ -708,20 +696,20 @@ ; RV64IM-NEXT: lhu a2, 24(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI3_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3) -; RV64IM-NEXT: lhu a6, 16(a1) +; RV64IM-NEXT: lhu a4, 16(a1) ; RV64IM-NEXT: lhu a5, 8(a1) ; RV64IM-NEXT: lhu a1, 0(a1) ; RV64IM-NEXT: mulhu a3, a2, a3 -; RV64IM-NEXT: sub a4, a2, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: srli a6, a6, 1 +; RV64IM-NEXT: add a3, a6, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: li a4, 95 -; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: li a6, 95 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: andi a1, a1, 63 ; RV64IM-NEXT: andi a3, a5, 31 -; RV64IM-NEXT: andi a4, a6, 7 +; RV64IM-NEXT: andi a4, a4, 7 ; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) ; RV64IM-NEXT: sh a1, 0(a0) @@ -741,10 +729,10 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 12(a1) +; RV32I-NEXT: lhu s0, 12(a1) ; RV32I-NEXT: lhu s1, 8(a1) ; RV32I-NEXT: lhu a2, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 654 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt @@ -755,12 +743,12 @@ ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) -; RV32I-NEXT: sh s3, 2(s0) -; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s2) +; RV32I-NEXT: sh s1, 4(s2) +; RV32I-NEXT: sh s3, 2(s2) +; RV32I-NEXT: sh zero, 0(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -811,10 +799,10 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 24(a1) +; RV64I-NEXT: lhu s0, 24(a1) ; RV64I-NEXT: lhu s1, 16(a1) ; RV64I-NEXT: lhu a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt @@ -825,12 +813,12 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) -; RV64I-NEXT: sh s3, 2(s0) -; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s2) +; RV64I-NEXT: sh s1, 4(s2) +; RV64I-NEXT: sh s3, 2(s2) +; RV64I-NEXT: sh zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -851,29 +839,29 @@ ; RV64IM-NEXT: srli a5, a5, 1 ; RV64IM-NEXT: add a3, a5, a3 ; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) -; RV64IM-NEXT: mulw a3, a3, a6 -; RV64IM-NEXT: subw a6, a2, a3 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: lui a6, %hi(.LCPI4_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI4_1)(a6) +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 1 -; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: mulhu a3, a3, a6 ; RV64IM-NEXT: srli a3, a3, 7 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) -; RV64IM-NEXT: li a2, 654 -; RV64IM-NEXT: mulw a2, a3, a2 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: mulhu a2, a4, a5 -; RV64IM-NEXT: srli a2, a2, 12 -; RV64IM-NEXT: lui a3, 1 -; RV64IM-NEXT: addiw a3, a3, 1327 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw a2, a4, a2 +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: mulhu a3, a4, a5 +; RV64IM-NEXT: srli a3, a3, 12 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a6, 4(a0) +; RV64IM-NEXT: sh a2, 4(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -903,16 +891,15 @@ ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s2, 24(a1) -; RV32I-NEXT: lw s3, 28(a1) -; RV32I-NEXT: lw s4, 16(a1) -; RV32I-NEXT: lw s5, 20(a1) -; RV32I-NEXT: lw s6, 8(a1) -; RV32I-NEXT: lw s1, 12(a1) +; RV32I-NEXT: lw s0, 24(a1) +; RV32I-NEXT: lw s1, 28(a1) +; RV32I-NEXT: lw s2, 16(a1) +; RV32I-NEXT: lw s3, 20(a1) +; RV32I-NEXT: lw s4, 8(a1) +; RV32I-NEXT: lw s5, 12(a1) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: li a3, 0 @@ -920,33 +907,33 @@ ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: mv s8, a1 ; RV32I-NEXT: li a2, 654 -; RV32I-NEXT: mv a0, s6 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __umoddi3@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv s9, a1 -; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s5 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3@plt -; RV32I-NEXT: sw a1, 28(s0) -; RV32I-NEXT: sw a0, 24(s0) -; RV32I-NEXT: sw s1, 20(s0) -; RV32I-NEXT: sw s4, 16(s0) -; RV32I-NEXT: sw s9, 12(s0) -; RV32I-NEXT: sw s6, 8(s0) -; RV32I-NEXT: sw s8, 4(s0) -; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __umoddi3@plt +; RV32I-NEXT: sw a1, 28(s6) +; RV32I-NEXT: sw a0, 24(s6) +; RV32I-NEXT: sw s3, 20(s6) +; RV32I-NEXT: sw s2, 16(s6) +; RV32I-NEXT: sw s5, 12(s6) +; RV32I-NEXT: sw s4, 8(s6) +; RV32I-NEXT: sw s8, 4(s6) +; RV32I-NEXT: sw s7, 0(s6) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -957,7 +944,6 @@ ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; @@ -974,16 +960,15 @@ ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s2, 24(a1) -; RV32IM-NEXT: lw s3, 28(a1) -; RV32IM-NEXT: lw s4, 16(a1) -; RV32IM-NEXT: lw s5, 20(a1) -; RV32IM-NEXT: lw s6, 8(a1) -; RV32IM-NEXT: lw s1, 12(a1) +; RV32IM-NEXT: lw s0, 24(a1) +; RV32IM-NEXT: lw s1, 28(a1) +; RV32IM-NEXT: lw s2, 16(a1) +; RV32IM-NEXT: lw s3, 20(a1) +; RV32IM-NEXT: lw s4, 8(a1) +; RV32IM-NEXT: lw s5, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) -; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: mv s6, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: li a3, 0 @@ -991,33 +976,33 @@ ; RV32IM-NEXT: mv s7, a0 ; RV32IM-NEXT: mv s8, a1 ; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s6 -; RV32IM-NEXT: mv a1, s1 -; RV32IM-NEXT: li a3, 0 -; RV32IM-NEXT: call __umoddi3@plt -; RV32IM-NEXT: mv s6, a0 -; RV32IM-NEXT: mv s9, a1 -; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s4 ; RV32IM-NEXT: mv a1, s5 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3@plt ; RV32IM-NEXT: mv s4, a0 -; RV32IM-NEXT: mv s1, a1 -; RV32IM-NEXT: lui a0, 1 -; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv s5, a1 +; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s2 ; RV32IM-NEXT: mv a1, s3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3@plt -; RV32IM-NEXT: sw a1, 28(s0) -; RV32IM-NEXT: sw a0, 24(s0) -; RV32IM-NEXT: sw s1, 20(s0) -; RV32IM-NEXT: sw s4, 16(s0) -; RV32IM-NEXT: sw s9, 12(s0) -; RV32IM-NEXT: sw s6, 8(s0) -; RV32IM-NEXT: sw s8, 4(s0) -; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: mv s2, a0 +; RV32IM-NEXT: mv s3, a1 +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv a0, s0 +; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: li a3, 0 +; RV32IM-NEXT: call __umoddi3@plt +; RV32IM-NEXT: sw a1, 28(s6) +; RV32IM-NEXT: sw a0, 24(s6) +; RV32IM-NEXT: sw s3, 20(s6) +; RV32IM-NEXT: sw s2, 16(s6) +; RV32IM-NEXT: sw s5, 12(s6) +; RV32IM-NEXT: sw s4, 8(s6) +; RV32IM-NEXT: sw s8, 4(s6) +; RV32IM-NEXT: sw s7, 0(s6) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1028,7 +1013,6 @@ ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 48 ; RV32IM-NEXT: ret ; @@ -1040,10 +1024,10 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld s2, 24(a1) +; RV64I-NEXT: ld s0, 24(a1) ; RV64I-NEXT: ld s1, 16(a1) ; RV64I-NEXT: ld a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt @@ -1054,12 +1038,12 @@ ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sd a0, 24(s0) -; RV64I-NEXT: sd s1, 16(s0) -; RV64I-NEXT: sd s3, 8(s0) -; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd a0, 24(s2) +; RV64I-NEXT: sd s1, 16(s2) +; RV64I-NEXT: sd s3, 8(s2) +; RV64I-NEXT: sd zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1080,29 +1064,29 @@ ; RV64IM-NEXT: srli a5, a5, 1 ; RV64IM-NEXT: add a3, a5, a3 ; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: lui a6, %hi(.LCPI6_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI6_1)(a6) +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 1 -; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: mulhu a3, a3, a6 ; RV64IM-NEXT: srli a3, a3, 7 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) -; RV64IM-NEXT: li a2, 654 -; RV64IM-NEXT: mul a2, a3, a2 -; RV64IM-NEXT: sub a1, a1, a2 -; RV64IM-NEXT: mulhu a2, a4, a5 -; RV64IM-NEXT: srli a2, a2, 12 -; RV64IM-NEXT: lui a3, 1 -; RV64IM-NEXT: addiw a3, a3, 1327 -; RV64IM-NEXT: mul a2, a2, a3 -; RV64IM-NEXT: sub a2, a4, a2 +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mul a3, a3, a6 +; RV64IM-NEXT: sub a1, a1, a3 +; RV64IM-NEXT: mulhu a3, a4, a5 +; RV64IM-NEXT: srli a3, a3, 12 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a3, a4, a3 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a2, 24(a0) +; RV64IM-NEXT: sd a3, 24(a0) ; RV64IM-NEXT: sd a1, 8(a0) -; RV64IM-NEXT: sd a6, 16(a0) +; RV64IM-NEXT: sd a2, 16(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -16,18 +16,18 @@ ; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: and a2, a0, a2 ; RV32-NEXT: slli a3, a2, 16 -; RV32-NEXT: srai a6, a3, 24 +; RV32-NEXT: srai a3, a3, 24 ; RV32-NEXT: slli a4, a0, 24 -; RV32-NEXT: srai a3, a4, 24 +; RV32-NEXT: srai a6, a4, 24 ; RV32-NEXT: slli a4, a0, 8 ; RV32-NEXT: mv a5, a0 -; RV32-NEXT: bgtz a3, .LBB0_2 +; RV32-NEXT: bgtz a6, .LBB0_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a5, 0 ; RV32-NEXT: .LBB0_2: ; RV32-NEXT: srai a4, a4, 24 ; RV32-NEXT: andi a5, a5, 255 -; RV32-NEXT: bgtz a6, .LBB0_4 +; RV32-NEXT: bgtz a3, .LBB0_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a2, 0 ; RV32-NEXT: j .LBB0_5 @@ -54,18 +54,18 @@ ; RV64-NEXT: addiw a2, a2, -256 ; RV64-NEXT: and a2, a0, a2 ; RV64-NEXT: slli a3, a2, 48 -; RV64-NEXT: srai a6, a3, 56 +; RV64-NEXT: srai a3, a3, 56 ; RV64-NEXT: slli a4, a0, 56 -; RV64-NEXT: srai a3, a4, 56 +; RV64-NEXT: srai a6, a4, 56 ; RV64-NEXT: slli a4, a0, 40 ; RV64-NEXT: mv a5, a0 -; RV64-NEXT: bgtz a3, .LBB0_2 +; RV64-NEXT: bgtz a6, .LBB0_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a5, 0 ; RV64-NEXT: .LBB0_2: ; RV64-NEXT: srai a4, a4, 56 ; RV64-NEXT: andi a5, a5, 255 -; RV64-NEXT: bgtz a6, .LBB0_4 +; RV64-NEXT: bgtz a3, .LBB0_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a2, 0 ; RV64-NEXT: j .LBB0_5 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -567,9 +567,9 @@ define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV32-LABEL: ssubo.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: sltu a6, a0, a2 -; RV32-NEXT: sub a5, a1, a3 -; RV32-NEXT: sub a5, a5, a6 +; RV32-NEXT: sltu a5, a0, a2 +; RV32-NEXT: sub a6, a1, a3 +; RV32-NEXT: sub a5, a6, a5 ; RV32-NEXT: xor a6, a1, a5 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: and a1, a1, a6 @@ -591,9 +591,9 @@ ; ; RV32ZBA-LABEL: ssubo.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: sltu a6, a0, a2 -; RV32ZBA-NEXT: sub a5, a1, a3 -; RV32ZBA-NEXT: sub a5, a5, a6 +; RV32ZBA-NEXT: sltu a5, a0, a2 +; RV32ZBA-NEXT: sub a6, a1, a3 +; RV32ZBA-NEXT: sub a5, a6, a5 ; RV32ZBA-NEXT: xor a6, a1, a5 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: and a1, a1, a6 @@ -905,64 +905,58 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 ; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 -; RV32-NEXT: .cfi_offset s3, -16 -; RV32-NEXT: mulhu a6, a0, a2 -; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: sltu a7, a6, a5 -; RV32-NEXT: mulhu a5, a1, a2 -; RV32-NEXT: add a7, a5, a7 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: sltu t0, a6, a5 -; RV32-NEXT: mulhu a5, a0, a3 -; RV32-NEXT: add a5, a5, t0 -; RV32-NEXT: add t0, a7, a5 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a5, t1, t0 +; RV32-NEXT: mulhu a5, a0, a2 +; RV32-NEXT: mul a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: sltu a6, a5, a6 +; RV32-NEXT: mulhu a7, a1, a2 +; RV32-NEXT: add a6, a7, a6 +; RV32-NEXT: mul a7, a0, a3 +; RV32-NEXT: add a5, a7, a5 +; RV32-NEXT: sltu a7, a5, a7 +; RV32-NEXT: mulhu t0, a0, a3 +; RV32-NEXT: add a7, t0, a7 +; RV32-NEXT: add a7, a6, a7 +; RV32-NEXT: mul t0, a1, a3 +; RV32-NEXT: add t1, t0, a7 ; RV32-NEXT: srai t2, a1, 31 ; RV32-NEXT: mul t3, a2, t2 ; RV32-NEXT: srai t4, a3, 31 ; RV32-NEXT: mul t5, t4, a0 ; RV32-NEXT: add t6, t5, t3 -; RV32-NEXT: add s3, a5, t6 -; RV32-NEXT: sltu s2, s3, a5 -; RV32-NEXT: sltu a5, a5, t1 -; RV32-NEXT: sltu s1, t0, a7 -; RV32-NEXT: mulhu s0, a1, a3 -; RV32-NEXT: add s1, s0, s1 -; RV32-NEXT: add a5, s1, a5 -; RV32-NEXT: mulhu s1, a2, t2 -; RV32-NEXT: add s1, s1, t3 +; RV32-NEXT: add s0, t1, t6 +; RV32-NEXT: sltu s1, s0, t1 +; RV32-NEXT: sltu t0, t1, t0 +; RV32-NEXT: sltu a6, a7, a6 +; RV32-NEXT: mulhu a7, a1, a3 +; RV32-NEXT: add a6, a7, a6 +; RV32-NEXT: add a6, a6, t0 +; RV32-NEXT: mulhu a7, a2, t2 +; RV32-NEXT: add a7, a7, t3 ; RV32-NEXT: mul a3, a3, t2 -; RV32-NEXT: add a3, s1, a3 +; RV32-NEXT: add a3, a7, a3 ; RV32-NEXT: mul a1, t4, a1 -; RV32-NEXT: mulhu s1, t4, a0 -; RV32-NEXT: add a1, s1, a1 +; RV32-NEXT: mulhu a7, t4, a0 +; RV32-NEXT: add a1, a7, a1 ; RV32-NEXT: add a1, a1, t5 ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a3, t6, t5 ; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: add a1, a5, a1 -; RV32-NEXT: add a1, a1, s2 -; RV32-NEXT: srai a3, a6, 31 +; RV32-NEXT: add a1, a6, a1 +; RV32-NEXT: add a1, a1, s1 +; RV32-NEXT: srai a3, a5, 31 ; RV32-NEXT: xor a1, a1, a3 -; RV32-NEXT: xor a3, s3, a3 +; RV32-NEXT: xor a3, s0, a3 ; RV32-NEXT: or a1, a3, a1 ; RV32-NEXT: snez a1, a1 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: sw a0, 0(a4) -; RV32-NEXT: sw a6, 4(a4) +; RV32-NEXT: sw a5, 4(a4) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -982,64 +976,58 @@ ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s3, 0(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 ; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 -; RV32ZBA-NEXT: .cfi_offset s3, -16 -; RV32ZBA-NEXT: mulhu a6, a0, a2 -; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: sltu a7, a6, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 -; RV32ZBA-NEXT: add a7, a5, a7 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: sltu t0, a6, a5 -; RV32ZBA-NEXT: mulhu a5, a0, a3 -; RV32ZBA-NEXT: add a5, a5, t0 -; RV32ZBA-NEXT: add t0, a7, a5 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a5, t1, t0 +; RV32ZBA-NEXT: mulhu a5, a0, a2 +; RV32ZBA-NEXT: mul a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: sltu a6, a5, a6 +; RV32ZBA-NEXT: mulhu a7, a1, a2 +; RV32ZBA-NEXT: add a6, a7, a6 +; RV32ZBA-NEXT: mul a7, a0, a3 +; RV32ZBA-NEXT: add a5, a7, a5 +; RV32ZBA-NEXT: sltu a7, a5, a7 +; RV32ZBA-NEXT: mulhu t0, a0, a3 +; RV32ZBA-NEXT: add a7, t0, a7 +; RV32ZBA-NEXT: add a7, a6, a7 +; RV32ZBA-NEXT: mul t0, a1, a3 +; RV32ZBA-NEXT: add t1, t0, a7 ; RV32ZBA-NEXT: srai t2, a1, 31 ; RV32ZBA-NEXT: mul t3, a2, t2 ; RV32ZBA-NEXT: srai t4, a3, 31 ; RV32ZBA-NEXT: mul t5, t4, a0 ; RV32ZBA-NEXT: add t6, t5, t3 -; RV32ZBA-NEXT: add s3, a5, t6 -; RV32ZBA-NEXT: sltu s2, s3, a5 -; RV32ZBA-NEXT: sltu a5, a5, t1 -; RV32ZBA-NEXT: sltu s1, t0, a7 -; RV32ZBA-NEXT: mulhu s0, a1, a3 -; RV32ZBA-NEXT: add s1, s0, s1 -; RV32ZBA-NEXT: add a5, s1, a5 -; RV32ZBA-NEXT: mulhu s1, a2, t2 -; RV32ZBA-NEXT: add s1, s1, t3 +; RV32ZBA-NEXT: add s0, t1, t6 +; RV32ZBA-NEXT: sltu s1, s0, t1 +; RV32ZBA-NEXT: sltu t0, t1, t0 +; RV32ZBA-NEXT: sltu a6, a7, a6 +; RV32ZBA-NEXT: mulhu a7, a1, a3 +; RV32ZBA-NEXT: add a6, a7, a6 +; RV32ZBA-NEXT: add a6, a6, t0 +; RV32ZBA-NEXT: mulhu a7, a2, t2 +; RV32ZBA-NEXT: add a7, a7, t3 ; RV32ZBA-NEXT: mul a3, a3, t2 -; RV32ZBA-NEXT: add a3, s1, a3 +; RV32ZBA-NEXT: add a3, a7, a3 ; RV32ZBA-NEXT: mul a1, t4, a1 -; RV32ZBA-NEXT: mulhu s1, t4, a0 -; RV32ZBA-NEXT: add a1, s1, a1 +; RV32ZBA-NEXT: mulhu a7, t4, a0 +; RV32ZBA-NEXT: add a1, a7, a1 ; RV32ZBA-NEXT: add a1, a1, t5 ; RV32ZBA-NEXT: add a1, a1, a3 ; RV32ZBA-NEXT: sltu a3, t6, t5 ; RV32ZBA-NEXT: add a1, a1, a3 -; RV32ZBA-NEXT: add a1, a5, a1 -; RV32ZBA-NEXT: add a1, a1, s2 -; RV32ZBA-NEXT: srai a3, a6, 31 +; RV32ZBA-NEXT: add a1, a6, a1 +; RV32ZBA-NEXT: add a1, a1, s1 +; RV32ZBA-NEXT: srai a3, a5, 31 ; RV32ZBA-NEXT: xor a1, a1, a3 -; RV32ZBA-NEXT: xor a3, s3, a3 +; RV32ZBA-NEXT: xor a3, s0, a3 ; RV32ZBA-NEXT: or a1, a3, a1 ; RV32ZBA-NEXT: snez a1, a1 ; RV32ZBA-NEXT: mul a0, a0, a2 ; RV32ZBA-NEXT: sw a0, 0(a4) -; RV32ZBA-NEXT: sw a6, 4(a4) +; RV32ZBA-NEXT: sw a5, 4(a4) ; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s3, 0(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -1063,27 +1051,27 @@ define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) { ; RV32-LABEL: smulo2.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: li a7, 13 -; RV32-NEXT: mulhu a4, a0, a7 -; RV32-NEXT: mul a5, a1, a7 -; RV32-NEXT: add t0, a5, a4 -; RV32-NEXT: sltu a6, t0, a5 -; RV32-NEXT: mulhu a5, a1, a7 -; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mulhu a4, a0, a3 +; RV32-NEXT: mul a5, a1, a3 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: srai a1, a1, 31 -; RV32-NEXT: mul a3, a1, a7 -; RV32-NEXT: add a3, a5, a3 -; RV32-NEXT: srai a4, t0, 31 -; RV32-NEXT: xor a6, a3, a4 -; RV32-NEXT: sltu a3, a3, a5 -; RV32-NEXT: mulh a1, a1, a7 -; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: xor a1, a1, a4 -; RV32-NEXT: or a1, a6, a1 +; RV32-NEXT: mul a6, a1, a3 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: srai a7, a4, 31 +; RV32-NEXT: xor t0, a6, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulh a1, a1, a3 +; RV32-NEXT: add a1, a1, a5 +; RV32-NEXT: xor a1, a1, a7 +; RV32-NEXT: or a1, t0, a1 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: mul a0, a0, a7 +; RV32-NEXT: mul a0, a0, a3 ; RV32-NEXT: sw a0, 0(a2) -; RV32-NEXT: sw t0, 4(a2) +; RV32-NEXT: sw a4, 4(a2) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; @@ -1100,27 +1088,27 @@ ; ; RV32ZBA-LABEL: smulo2.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: li a7, 13 -; RV32ZBA-NEXT: mulhu a4, a0, a7 -; RV32ZBA-NEXT: mul a5, a1, a7 -; RV32ZBA-NEXT: add t0, a5, a4 -; RV32ZBA-NEXT: sltu a6, t0, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a7 -; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: li a3, 13 +; RV32ZBA-NEXT: mulhu a4, a0, a3 +; RV32ZBA-NEXT: mul a5, a1, a3 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 ; RV32ZBA-NEXT: srai a1, a1, 31 -; RV32ZBA-NEXT: mul a3, a1, a7 -; RV32ZBA-NEXT: add a3, a5, a3 -; RV32ZBA-NEXT: srai a4, t0, 31 -; RV32ZBA-NEXT: xor a6, a3, a4 -; RV32ZBA-NEXT: sltu a3, a3, a5 -; RV32ZBA-NEXT: mulh a1, a1, a7 -; RV32ZBA-NEXT: add a1, a1, a3 -; RV32ZBA-NEXT: xor a1, a1, a4 -; RV32ZBA-NEXT: or a1, a6, a1 +; RV32ZBA-NEXT: mul a6, a1, a3 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: srai a7, a4, 31 +; RV32ZBA-NEXT: xor t0, a6, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulh a1, a1, a3 +; RV32ZBA-NEXT: add a1, a1, a5 +; RV32ZBA-NEXT: xor a1, a1, a7 +; RV32ZBA-NEXT: or a1, t0, a1 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: mul a0, a0, a7 +; RV32ZBA-NEXT: mul a0, a0, a3 ; RV32ZBA-NEXT: sw a0, 0(a2) -; RV32ZBA-NEXT: sw t0, 4(a2) +; RV32ZBA-NEXT: sw a4, 4(a2) ; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: ret ; @@ -1289,25 +1277,25 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV32-LABEL: umulo.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: mul a6, a3, a0 -; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mulhu a5, a0, a2 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: sltu a7, a6, a5 -; RV32-NEXT: snez t0, a3 -; RV32-NEXT: snez a5, a1 -; RV32-NEXT: and a5, a5, t0 +; RV32-NEXT: mul a5, a3, a0 +; RV32-NEXT: mul a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mulhu a6, a0, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: sltu a6, a5, a6 +; RV32-NEXT: snez a7, a3 +; RV32-NEXT: snez t0, a1 +; RV32-NEXT: and a7, t0, a7 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a5, a1 +; RV32-NEXT: or a1, a7, a1 ; RV32-NEXT: mulhu a3, a3, a0 ; RV32-NEXT: snez a3, a3 ; RV32-NEXT: or a1, a1, a3 -; RV32-NEXT: or a1, a1, a7 +; RV32-NEXT: or a1, a1, a6 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: sw a0, 0(a4) -; RV32-NEXT: sw a6, 4(a4) +; RV32-NEXT: sw a5, 4(a4) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; @@ -1322,25 +1310,25 @@ ; ; RV32ZBA-LABEL: umulo.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: mul a6, a3, a0 -; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mulhu a5, a0, a2 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: sltu a7, a6, a5 -; RV32ZBA-NEXT: snez t0, a3 -; RV32ZBA-NEXT: snez a5, a1 -; RV32ZBA-NEXT: and a5, a5, t0 +; RV32ZBA-NEXT: mul a5, a3, a0 +; RV32ZBA-NEXT: mul a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a0, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: sltu a6, a5, a6 +; RV32ZBA-NEXT: snez a7, a3 +; RV32ZBA-NEXT: snez t0, a1 +; RV32ZBA-NEXT: and a7, t0, a7 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a5, a1 +; RV32ZBA-NEXT: or a1, a7, a1 ; RV32ZBA-NEXT: mulhu a3, a3, a0 ; RV32ZBA-NEXT: snez a3, a3 ; RV32ZBA-NEXT: or a1, a1, a3 -; RV32ZBA-NEXT: or a1, a1, a7 +; RV32ZBA-NEXT: or a1, a1, a6 ; RV32ZBA-NEXT: mul a0, a0, a2 ; RV32ZBA-NEXT: sw a0, 0(a4) -; RV32ZBA-NEXT: sw a6, 4(a4) +; RV32ZBA-NEXT: sw a5, 4(a4) ; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: ret ; @@ -2340,62 +2328,56 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: mulhu a4, a0, a2 ; RV32-NEXT: mul a5, a1, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mul a6, a0, a3 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: sltu a6, a4, a6 +; RV32-NEXT: mulhu a7, a0, a3 +; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a7, a5, a4 -; RV32-NEXT: sltu a5, a7, a5 -; RV32-NEXT: mulhu a4, a0, a3 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add t0, a6, a4 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a4, t1, t0 -; RV32-NEXT: srai a5, a1, 31 -; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: add t0, a7, a6 +; RV32-NEXT: srai t1, a1, 31 +; RV32-NEXT: mul t2, a2, t1 ; RV32-NEXT: srai t3, a3, 31 ; RV32-NEXT: mul t4, t3, a0 ; RV32-NEXT: add t5, t4, t2 -; RV32-NEXT: add t6, a4, t5 -; RV32-NEXT: sltu s2, t6, a4 -; RV32-NEXT: sltu a4, a4, t1 -; RV32-NEXT: sltu s0, t0, a6 -; RV32-NEXT: mulhu s1, a1, a3 -; RV32-NEXT: add s1, s1, s0 -; RV32-NEXT: add a4, s1, a4 -; RV32-NEXT: mulhu s1, a2, a5 -; RV32-NEXT: add s1, s1, t2 -; RV32-NEXT: mul a5, a3, a5 -; RV32-NEXT: add a5, s1, a5 -; RV32-NEXT: mul s1, t3, a1 -; RV32-NEXT: mulhu s0, t3, a0 -; RV32-NEXT: add s1, s0, s1 -; RV32-NEXT: add s1, s1, t4 -; RV32-NEXT: add a5, s1, a5 -; RV32-NEXT: sltu s1, t5, t4 -; RV32-NEXT: add a5, a5, s1 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add a4, a4, s2 -; RV32-NEXT: srai a5, a7, 31 -; RV32-NEXT: xor a4, a4, a5 -; RV32-NEXT: xor a5, t6, a5 -; RV32-NEXT: or a4, a5, a4 +; RV32-NEXT: add t6, t0, t5 +; RV32-NEXT: sltu s0, t6, t0 +; RV32-NEXT: sltu a7, t0, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: mulhu a6, a2, t1 +; RV32-NEXT: add a6, a6, t2 +; RV32-NEXT: mul a7, a3, t1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: mul a7, t3, a1 +; RV32-NEXT: mulhu t0, t3, a0 +; RV32-NEXT: add a7, t0, a7 +; RV32-NEXT: add a7, a7, t4 +; RV32-NEXT: add a6, a7, a6 +; RV32-NEXT: sltu a7, t5, t4 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, a5, s0 +; RV32-NEXT: srai a4, a4, 31 +; RV32-NEXT: xor a5, a5, a4 +; RV32-NEXT: xor a4, t6, a4 +; RV32-NEXT: or a4, a4, a5 ; RV32-NEXT: bnez a4, .LBB44_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB44_2: # %entry ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2415,62 +2397,56 @@ ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 -; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 ; RV32ZBA-NEXT: mulhu a4, a0, a2 ; RV32ZBA-NEXT: mul a5, a1, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mul a6, a0, a3 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: sltu a6, a4, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a3 +; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a7, a5, a4 -; RV32ZBA-NEXT: sltu a5, a7, a5 -; RV32ZBA-NEXT: mulhu a4, a0, a3 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add t0, a6, a4 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a4, t1, t0 -; RV32ZBA-NEXT: srai a5, a1, 31 -; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: mul a7, a1, a3 +; RV32ZBA-NEXT: add t0, a7, a6 +; RV32ZBA-NEXT: srai t1, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, t1 ; RV32ZBA-NEXT: srai t3, a3, 31 ; RV32ZBA-NEXT: mul t4, t3, a0 ; RV32ZBA-NEXT: add t5, t4, t2 -; RV32ZBA-NEXT: add t6, a4, t5 -; RV32ZBA-NEXT: sltu s2, t6, a4 -; RV32ZBA-NEXT: sltu a4, a4, t1 -; RV32ZBA-NEXT: sltu s0, t0, a6 -; RV32ZBA-NEXT: mulhu s1, a1, a3 -; RV32ZBA-NEXT: add s1, s1, s0 -; RV32ZBA-NEXT: add a4, s1, a4 -; RV32ZBA-NEXT: mulhu s1, a2, a5 -; RV32ZBA-NEXT: add s1, s1, t2 -; RV32ZBA-NEXT: mul a5, a3, a5 -; RV32ZBA-NEXT: add a5, s1, a5 -; RV32ZBA-NEXT: mul s1, t3, a1 -; RV32ZBA-NEXT: mulhu s0, t3, a0 -; RV32ZBA-NEXT: add s1, s0, s1 -; RV32ZBA-NEXT: add s1, s1, t4 -; RV32ZBA-NEXT: add a5, s1, a5 -; RV32ZBA-NEXT: sltu s1, t5, t4 -; RV32ZBA-NEXT: add a5, a5, s1 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add a4, a4, s2 -; RV32ZBA-NEXT: srai a5, a7, 31 -; RV32ZBA-NEXT: xor a4, a4, a5 -; RV32ZBA-NEXT: xor a5, t6, a5 -; RV32ZBA-NEXT: or a4, a5, a4 +; RV32ZBA-NEXT: add t6, t0, t5 +; RV32ZBA-NEXT: sltu s0, t6, t0 +; RV32ZBA-NEXT: sltu a7, t0, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: add a5, a5, a7 +; RV32ZBA-NEXT: mulhu a6, a2, t1 +; RV32ZBA-NEXT: add a6, a6, t2 +; RV32ZBA-NEXT: mul a7, a3, t1 +; RV32ZBA-NEXT: add a6, a6, a7 +; RV32ZBA-NEXT: mul a7, t3, a1 +; RV32ZBA-NEXT: mulhu t0, t3, a0 +; RV32ZBA-NEXT: add a7, t0, a7 +; RV32ZBA-NEXT: add a7, a7, t4 +; RV32ZBA-NEXT: add a6, a7, a6 +; RV32ZBA-NEXT: sltu a7, t5, t4 +; RV32ZBA-NEXT: add a6, a6, a7 +; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: add a5, a5, s0 +; RV32ZBA-NEXT: srai a4, a4, 31 +; RV32ZBA-NEXT: xor a5, a5, a4 +; RV32ZBA-NEXT: xor a4, t6, a4 +; RV32ZBA-NEXT: or a4, a4, a5 ; RV32ZBA-NEXT: bnez a4, .LBB44_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: mv a0, a2 ; RV32ZBA-NEXT: mv a1, a3 ; RV32ZBA-NEXT: .LBB44_2: # %entry ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -2497,40 +2473,36 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: mulhu a4, a0, a2 ; RV32-NEXT: mul a5, a1, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mul a6, a0, a3 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: sltu a6, a4, a6 +; RV32-NEXT: mulhu a7, a0, a3 +; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a7, a5, a4 -; RV32-NEXT: sltu a5, a7, a5 -; RV32-NEXT: mulhu a4, a0, a3 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add t0, a6, a4 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a4, t1, t0 -; RV32-NEXT: srai a5, a1, 31 -; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: add t0, a7, a6 +; RV32-NEXT: srai t1, a1, 31 +; RV32-NEXT: mul t2, a2, t1 ; RV32-NEXT: srai t3, a3, 31 ; RV32-NEXT: mul t4, t3, a0 ; RV32-NEXT: add t5, t4, t2 -; RV32-NEXT: add t6, a4, t5 -; RV32-NEXT: sltu s2, t6, a4 -; RV32-NEXT: sltu a4, a4, t1 -; RV32-NEXT: sltu s0, t0, a6 -; RV32-NEXT: mulhu s1, a1, a3 -; RV32-NEXT: add s1, s1, s0 -; RV32-NEXT: add a4, s1, a4 -; RV32-NEXT: mulhu a2, a2, a5 +; RV32-NEXT: add t6, t0, t5 +; RV32-NEXT: sltu s0, t6, t0 +; RV32-NEXT: sltu a7, t0, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: mulhu a2, a2, t1 ; RV32-NEXT: add a2, a2, t2 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: mul a3, a3, t1 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: mul a1, t3, a1 ; RV32-NEXT: mulhu a0, t3, a0 @@ -2539,16 +2511,14 @@ ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: sltu a1, t5, t4 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a4, a0 -; RV32-NEXT: add a0, a0, s2 -; RV32-NEXT: srai a1, a7, 31 +; RV32-NEXT: add a0, a5, a0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: srai a1, a4, 31 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: xor a1, t6, a1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2566,40 +2536,36 @@ ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 -; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 ; RV32ZBA-NEXT: mulhu a4, a0, a2 ; RV32ZBA-NEXT: mul a5, a1, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mul a6, a0, a3 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: sltu a6, a4, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a3 +; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a7, a5, a4 -; RV32ZBA-NEXT: sltu a5, a7, a5 -; RV32ZBA-NEXT: mulhu a4, a0, a3 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add t0, a6, a4 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a4, t1, t0 -; RV32ZBA-NEXT: srai a5, a1, 31 -; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: mul a7, a1, a3 +; RV32ZBA-NEXT: add t0, a7, a6 +; RV32ZBA-NEXT: srai t1, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, t1 ; RV32ZBA-NEXT: srai t3, a3, 31 ; RV32ZBA-NEXT: mul t4, t3, a0 ; RV32ZBA-NEXT: add t5, t4, t2 -; RV32ZBA-NEXT: add t6, a4, t5 -; RV32ZBA-NEXT: sltu s2, t6, a4 -; RV32ZBA-NEXT: sltu a4, a4, t1 -; RV32ZBA-NEXT: sltu s0, t0, a6 -; RV32ZBA-NEXT: mulhu s1, a1, a3 -; RV32ZBA-NEXT: add s1, s1, s0 -; RV32ZBA-NEXT: add a4, s1, a4 -; RV32ZBA-NEXT: mulhu a2, a2, a5 +; RV32ZBA-NEXT: add t6, t0, t5 +; RV32ZBA-NEXT: sltu s0, t6, t0 +; RV32ZBA-NEXT: sltu a7, t0, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: add a5, a5, a7 +; RV32ZBA-NEXT: mulhu a2, a2, t1 ; RV32ZBA-NEXT: add a2, a2, t2 -; RV32ZBA-NEXT: mul a3, a3, a5 +; RV32ZBA-NEXT: mul a3, a3, t1 ; RV32ZBA-NEXT: add a2, a2, a3 ; RV32ZBA-NEXT: mul a1, t3, a1 ; RV32ZBA-NEXT: mulhu a0, t3, a0 @@ -2608,16 +2574,14 @@ ; RV32ZBA-NEXT: add a0, a0, a2 ; RV32ZBA-NEXT: sltu a1, t5, t4 ; RV32ZBA-NEXT: add a0, a0, a1 -; RV32ZBA-NEXT: add a0, a4, a0 -; RV32ZBA-NEXT: add a0, a0, s2 -; RV32ZBA-NEXT: srai a1, a7, 31 +; RV32ZBA-NEXT: add a0, a5, a0 +; RV32ZBA-NEXT: add a0, a0, s0 +; RV32ZBA-NEXT: srai a1, a4, 31 ; RV32ZBA-NEXT: xor a0, a0, a1 ; RV32ZBA-NEXT: xor a1, t6, a1 ; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: seqz a0, a0 ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -2730,17 +2694,17 @@ ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: mulhu a5, a0, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: sltu a4, a4, a5 ; RV32-NEXT: snez a5, a3 -; RV32-NEXT: snez a4, a1 -; RV32-NEXT: and a4, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 -; RV32-NEXT: snez a5, a5 -; RV32-NEXT: or a4, a4, a5 -; RV32-NEXT: mulhu a5, a3, a0 -; RV32-NEXT: snez a5, a5 -; RV32-NEXT: or a4, a4, a5 -; RV32-NEXT: or a4, a4, a6 +; RV32-NEXT: snez a6, a1 +; RV32-NEXT: and a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: snez a6, a6 +; RV32-NEXT: or a5, a5, a6 +; RV32-NEXT: mulhu a6, a3, a0 +; RV32-NEXT: snez a6, a6 +; RV32-NEXT: or a5, a5, a6 +; RV32-NEXT: or a4, a5, a4 ; RV32-NEXT: bnez a4, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 @@ -2764,17 +2728,17 @@ ; RV32ZBA-NEXT: add a4, a5, a4 ; RV32ZBA-NEXT: mulhu a5, a0, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a5 ; RV32ZBA-NEXT: snez a5, a3 -; RV32ZBA-NEXT: snez a4, a1 -; RV32ZBA-NEXT: and a4, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 -; RV32ZBA-NEXT: snez a5, a5 -; RV32ZBA-NEXT: or a4, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a3, a0 -; RV32ZBA-NEXT: snez a5, a5 -; RV32ZBA-NEXT: or a4, a4, a5 -; RV32ZBA-NEXT: or a4, a4, a6 +; RV32ZBA-NEXT: snez a6, a1 +; RV32ZBA-NEXT: and a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: snez a6, a6 +; RV32ZBA-NEXT: or a5, a5, a6 +; RV32ZBA-NEXT: mulhu a6, a3, a0 +; RV32ZBA-NEXT: snez a6, a6 +; RV32ZBA-NEXT: or a5, a5, a6 +; RV32ZBA-NEXT: or a4, a5, a4 ; RV32ZBA-NEXT: bnez a4, .LBB48_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: mv a0, a2 @@ -2805,17 +2769,17 @@ ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: mulhu a5, a0, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: sltu a4, a4, a5 ; RV32-NEXT: snez a5, a3 -; RV32-NEXT: snez a4, a1 -; RV32-NEXT: and a4, a4, a5 +; RV32-NEXT: snez a6, a1 +; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a4, a1 +; RV32-NEXT: or a1, a5, a1 ; RV32-NEXT: mulhu a0, a3, a0 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: or a0, a0, a6 +; RV32-NEXT: or a0, a0, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; @@ -2832,17 +2796,17 @@ ; RV32ZBA-NEXT: add a4, a5, a4 ; RV32ZBA-NEXT: mulhu a5, a0, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a5 ; RV32ZBA-NEXT: snez a5, a3 -; RV32ZBA-NEXT: snez a4, a1 -; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: snez a6, a1 +; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a4, a1 +; RV32ZBA-NEXT: or a1, a5, a1 ; RV32ZBA-NEXT: mulhu a0, a3, a0 ; RV32ZBA-NEXT: snez a0, a0 ; RV32ZBA-NEXT: or a0, a1, a0 -; RV32ZBA-NEXT: or a0, a0, a6 +; RV32ZBA-NEXT: or a0, a0, a4 ; RV32ZBA-NEXT: xori a0, a0, 1 ; RV32ZBA-NEXT: ret ; @@ -3479,40 +3443,36 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: mulhu a4, a0, a2 ; RV32-NEXT: mul a5, a1, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mul a6, a0, a3 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: sltu a6, a4, a6 +; RV32-NEXT: mulhu a7, a0, a3 +; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a7, a5, a4 -; RV32-NEXT: sltu a5, a7, a5 -; RV32-NEXT: mulhu a4, a0, a3 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add t0, a6, a4 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a4, t1, t0 -; RV32-NEXT: srai a5, a1, 31 -; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: add t0, a7, a6 +; RV32-NEXT: srai t1, a1, 31 +; RV32-NEXT: mul t2, a2, t1 ; RV32-NEXT: srai t3, a3, 31 ; RV32-NEXT: mul t4, t3, a0 ; RV32-NEXT: add t5, t4, t2 -; RV32-NEXT: add t6, a4, t5 -; RV32-NEXT: sltu s2, t6, a4 -; RV32-NEXT: sltu a4, a4, t1 -; RV32-NEXT: sltu s0, t0, a6 -; RV32-NEXT: mulhu s1, a1, a3 -; RV32-NEXT: add s1, s1, s0 -; RV32-NEXT: add a4, s1, a4 -; RV32-NEXT: mulhu a2, a2, a5 +; RV32-NEXT: add t6, t0, t5 +; RV32-NEXT: sltu s0, t6, t0 +; RV32-NEXT: sltu a7, t0, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: mulhu a2, a2, t1 ; RV32-NEXT: add a2, a2, t2 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: mul a3, a3, t1 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: mul a1, t3, a1 ; RV32-NEXT: mulhu a0, t3, a0 @@ -3521,9 +3481,9 @@ ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: sltu a1, t5, t4 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a4, a0 -; RV32-NEXT: add a0, a0, s2 -; RV32-NEXT: srai a1, a7, 31 +; RV32-NEXT: add a0, a5, a0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: srai a1, a4, 31 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: xor a1, t6, a1 ; RV32-NEXT: or a0, a1, a0 @@ -3535,8 +3495,6 @@ ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB59_3: # %overflow ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3558,40 +3516,36 @@ ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 -; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 ; RV32ZBA-NEXT: mulhu a4, a0, a2 ; RV32ZBA-NEXT: mul a5, a1, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mul a6, a0, a3 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: sltu a6, a4, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a3 +; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a7, a5, a4 -; RV32ZBA-NEXT: sltu a5, a7, a5 -; RV32ZBA-NEXT: mulhu a4, a0, a3 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add t0, a6, a4 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a4, t1, t0 -; RV32ZBA-NEXT: srai a5, a1, 31 -; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: mul a7, a1, a3 +; RV32ZBA-NEXT: add t0, a7, a6 +; RV32ZBA-NEXT: srai t1, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, t1 ; RV32ZBA-NEXT: srai t3, a3, 31 ; RV32ZBA-NEXT: mul t4, t3, a0 ; RV32ZBA-NEXT: add t5, t4, t2 -; RV32ZBA-NEXT: add t6, a4, t5 -; RV32ZBA-NEXT: sltu s2, t6, a4 -; RV32ZBA-NEXT: sltu a4, a4, t1 -; RV32ZBA-NEXT: sltu s0, t0, a6 -; RV32ZBA-NEXT: mulhu s1, a1, a3 -; RV32ZBA-NEXT: add s1, s1, s0 -; RV32ZBA-NEXT: add a4, s1, a4 -; RV32ZBA-NEXT: mulhu a2, a2, a5 +; RV32ZBA-NEXT: add t6, t0, t5 +; RV32ZBA-NEXT: sltu s0, t6, t0 +; RV32ZBA-NEXT: sltu a7, t0, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: add a5, a5, a7 +; RV32ZBA-NEXT: mulhu a2, a2, t1 ; RV32ZBA-NEXT: add a2, a2, t2 -; RV32ZBA-NEXT: mul a3, a3, a5 +; RV32ZBA-NEXT: mul a3, a3, t1 ; RV32ZBA-NEXT: add a2, a2, a3 ; RV32ZBA-NEXT: mul a1, t3, a1 ; RV32ZBA-NEXT: mulhu a0, t3, a0 @@ -3600,9 +3554,9 @@ ; RV32ZBA-NEXT: add a0, a0, a2 ; RV32ZBA-NEXT: sltu a1, t5, t4 ; RV32ZBA-NEXT: add a0, a0, a1 -; RV32ZBA-NEXT: add a0, a4, a0 -; RV32ZBA-NEXT: add a0, a0, s2 -; RV32ZBA-NEXT: srai a1, a7, 31 +; RV32ZBA-NEXT: add a0, a5, a0 +; RV32ZBA-NEXT: add a0, a0, s0 +; RV32ZBA-NEXT: srai a1, a4, 31 ; RV32ZBA-NEXT: xor a0, a0, a1 ; RV32ZBA-NEXT: xor a1, t6, a1 ; RV32ZBA-NEXT: or a0, a1, a0 @@ -3614,8 +3568,6 @@ ; RV32ZBA-NEXT: li a0, 1 ; RV32ZBA-NEXT: .LBB59_3: # %overflow ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -3647,41 +3599,41 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32-LABEL: smulo2.br.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: li a6, -13 -; RV32-NEXT: mulhu a3, a0, a6 -; RV32-NEXT: mul a4, a1, a6 +; RV32-NEXT: li a2, -13 +; RV32-NEXT: mulhu a3, a0, a2 +; RV32-NEXT: mul a4, a1, a2 ; RV32-NEXT: add a3, a4, a3 ; RV32-NEXT: sltu a4, a3, a4 -; RV32-NEXT: mulhu a5, a1, a6 -; RV32-NEXT: add t3, a5, a4 -; RV32-NEXT: sub t0, a3, a0 -; RV32-NEXT: neg t1, a0 -; RV32-NEXT: sltu a2, t0, t1 +; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sub a3, a3, a0 +; RV32-NEXT: neg a5, a0 +; RV32-NEXT: sltu a6, a3, a5 ; RV32-NEXT: li a7, -1 -; RV32-NEXT: mulhu t2, a0, a7 -; RV32-NEXT: add a2, t2, a2 -; RV32-NEXT: add a2, t3, a2 -; RV32-NEXT: sub a5, a2, a1 -; RV32-NEXT: srai t6, a1, 31 -; RV32-NEXT: mul a4, t6, a6 -; RV32-NEXT: sub a4, a4, a0 -; RV32-NEXT: add t4, a5, a4 -; RV32-NEXT: sltu t5, t4, a5 -; RV32-NEXT: neg a3, a1 -; RV32-NEXT: sltu a3, a5, a3 -; RV32-NEXT: sltu a2, a2, t3 -; RV32-NEXT: mulhu a5, a1, a7 -; RV32-NEXT: add a2, a5, a2 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: sltu a3, a4, t1 -; RV32-NEXT: mulh a4, t6, a6 -; RV32-NEXT: sub a0, t2, a0 +; RV32-NEXT: mulhu t0, a0, a7 +; RV32-NEXT: add a6, t0, a6 +; RV32-NEXT: add a6, a4, a6 +; RV32-NEXT: sub t1, a6, a1 +; RV32-NEXT: srai t2, a1, 31 +; RV32-NEXT: mul t3, t2, a2 +; RV32-NEXT: sub t3, t3, a0 +; RV32-NEXT: add t4, t1, t3 +; RV32-NEXT: sltu t5, t4, t1 +; RV32-NEXT: neg t6, a1 +; RV32-NEXT: sltu t1, t1, t6 +; RV32-NEXT: sltu a4, a6, a4 +; RV32-NEXT: mulhu a6, a1, a7 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: add a4, a4, t1 +; RV32-NEXT: sltu a5, t3, a5 +; RV32-NEXT: mulh a2, t2, a2 +; RV32-NEXT: sub a0, t0, a0 ; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, a0, a3 -; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, a4, a0 ; RV32-NEXT: add a0, a0, t5 -; RV32-NEXT: srai a1, t0, 31 +; RV32-NEXT: srai a1, a3, 31 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: xor a1, t4, a1 ; RV32-NEXT: or a0, a1, a0 @@ -3709,41 +3661,41 @@ ; ; RV32ZBA-LABEL: smulo2.br.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: li a6, -13 -; RV32ZBA-NEXT: mulhu a3, a0, a6 -; RV32ZBA-NEXT: mul a4, a1, a6 +; RV32ZBA-NEXT: li a2, -13 +; RV32ZBA-NEXT: mulhu a3, a0, a2 +; RV32ZBA-NEXT: mul a4, a1, a2 ; RV32ZBA-NEXT: add a3, a4, a3 ; RV32ZBA-NEXT: sltu a4, a3, a4 -; RV32ZBA-NEXT: mulhu a5, a1, a6 -; RV32ZBA-NEXT: add t3, a5, a4 -; RV32ZBA-NEXT: sub t0, a3, a0 -; RV32ZBA-NEXT: neg t1, a0 -; RV32ZBA-NEXT: sltu a2, t0, t1 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sub a3, a3, a0 +; RV32ZBA-NEXT: neg a5, a0 +; RV32ZBA-NEXT: sltu a6, a3, a5 ; RV32ZBA-NEXT: li a7, -1 -; RV32ZBA-NEXT: mulhu t2, a0, a7 -; RV32ZBA-NEXT: add a2, t2, a2 -; RV32ZBA-NEXT: add a2, t3, a2 -; RV32ZBA-NEXT: sub a5, a2, a1 -; RV32ZBA-NEXT: srai t6, a1, 31 -; RV32ZBA-NEXT: mul a4, t6, a6 -; RV32ZBA-NEXT: sub a4, a4, a0 -; RV32ZBA-NEXT: add t4, a5, a4 -; RV32ZBA-NEXT: sltu t5, t4, a5 -; RV32ZBA-NEXT: neg a3, a1 -; RV32ZBA-NEXT: sltu a3, a5, a3 -; RV32ZBA-NEXT: sltu a2, a2, t3 -; RV32ZBA-NEXT: mulhu a5, a1, a7 -; RV32ZBA-NEXT: add a2, a5, a2 -; RV32ZBA-NEXT: add a2, a2, a3 -; RV32ZBA-NEXT: sltu a3, a4, t1 -; RV32ZBA-NEXT: mulh a4, t6, a6 -; RV32ZBA-NEXT: sub a0, t2, a0 +; RV32ZBA-NEXT: mulhu t0, a0, a7 +; RV32ZBA-NEXT: add a6, t0, a6 +; RV32ZBA-NEXT: add a6, a4, a6 +; RV32ZBA-NEXT: sub t1, a6, a1 +; RV32ZBA-NEXT: srai t2, a1, 31 +; RV32ZBA-NEXT: mul t3, t2, a2 +; RV32ZBA-NEXT: sub t3, t3, a0 +; RV32ZBA-NEXT: add t4, t1, t3 +; RV32ZBA-NEXT: sltu t5, t4, t1 +; RV32ZBA-NEXT: neg t6, a1 +; RV32ZBA-NEXT: sltu t1, t1, t6 +; RV32ZBA-NEXT: sltu a4, a6, a4 +; RV32ZBA-NEXT: mulhu a6, a1, a7 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: add a4, a4, t1 +; RV32ZBA-NEXT: sltu a5, t3, a5 +; RV32ZBA-NEXT: mulh a2, t2, a2 +; RV32ZBA-NEXT: sub a0, t0, a0 ; RV32ZBA-NEXT: sub a0, a0, a1 -; RV32ZBA-NEXT: add a0, a0, a4 -; RV32ZBA-NEXT: add a0, a0, a3 -; RV32ZBA-NEXT: add a0, a2, a0 +; RV32ZBA-NEXT: add a0, a0, a2 +; RV32ZBA-NEXT: add a0, a0, a5 +; RV32ZBA-NEXT: add a0, a4, a0 ; RV32ZBA-NEXT: add a0, a0, t5 -; RV32ZBA-NEXT: srai a1, t0, 31 +; RV32ZBA-NEXT: srai a1, a3, 31 ; RV32ZBA-NEXT: xor a0, a0, a1 ; RV32ZBA-NEXT: xor a1, t4, a1 ; RV32ZBA-NEXT: or a0, a1, a0 @@ -3852,17 +3804,17 @@ ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: mulhu a5, a0, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: sltu a4, a4, a5 ; RV32-NEXT: snez a5, a3 -; RV32-NEXT: snez a4, a1 -; RV32-NEXT: and a4, a4, a5 +; RV32-NEXT: snez a6, a1 +; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a4, a1 +; RV32-NEXT: or a1, a5, a1 ; RV32-NEXT: mulhu a0, a3, a0 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: or a0, a0, a6 +; RV32-NEXT: or a0, a0, a4 ; RV32-NEXT: beqz a0, .LBB62_2 ; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: li a0, 0 @@ -3889,17 +3841,17 @@ ; RV32ZBA-NEXT: add a4, a5, a4 ; RV32ZBA-NEXT: mulhu a5, a0, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a5 ; RV32ZBA-NEXT: snez a5, a3 -; RV32ZBA-NEXT: snez a4, a1 -; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: snez a6, a1 +; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a4, a1 +; RV32ZBA-NEXT: or a1, a5, a1 ; RV32ZBA-NEXT: mulhu a0, a3, a0 ; RV32ZBA-NEXT: snez a0, a0 ; RV32ZBA-NEXT: or a0, a1, a0 -; RV32ZBA-NEXT: or a0, a0, a6 +; RV32ZBA-NEXT: or a0, a0, a4 ; RV32ZBA-NEXT: beqz a0, .LBB62_2 ; RV32ZBA-NEXT: # %bb.1: # %overflow ; RV32ZBA-NEXT: li a0, 0