diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -80,6 +80,7 @@ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; } + bool enableMachineScheduler() const override { return true; } bool hasStdExtM() const { return HasStdExtM; } bool hasStdExtA() const { return HasStdExtA; } bool hasStdExtF() const { return HasStdExtF; } diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -96,36 +96,36 @@ ; RV32I-LABEL: add_wide_operand: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: srli a3, a2, 29 -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: slli a5, a4, 3 -; RV32I-NEXT: or a6, a5, a3 -; RV32I-NEXT: srli a4, a4, 29 -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: slli a3, a5, 3 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a1, 8(a1) +; RV32I-NEXT: srli a5, a2, 29 +; RV32I-NEXT: slli a4, a3, 3 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: srli a3, a3, 29 +; RV32I-NEXT: slli a5, a1, 3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: srli a1, a1, 29 +; RV32I-NEXT: slli a5, a6, 3 +; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: lui a5, 128 +; RV32I-NEXT: add a1, a1, a5 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a3, 8(a0) -; RV32I-NEXT: sw a6, 4(a0) -; RV32I-NEXT: srli a2, a5, 29 -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: lui a2, 128 -; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: add_wide_operand: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: srli a2, a0, 61 +; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a0, a0, 3 ; RV64I-NEXT: addi a2, zero, 1 ; RV64I-NEXT: slli a2, a2, 51 ; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: slli a0, a0, 3 ; RV64I-NEXT: ret %1 = add i128 %a, 5192296858534827628530496329220096 %2 = shl i128 %1, 3 diff --git a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll --- a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll +++ b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll @@ -20,9 +20,9 @@ define i64 @subc_sube(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: subc_sube: ; RV32I: # %bb.0: +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sltu a3, a0, a2 -; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret %1 = sub i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -10,17 +10,17 @@ ; RISCV32-LABEL: addcarry: ; RISCV32: # %bb.0: ; RISCV32-NEXT: mul a4, a0, a3 -; RISCV32-NEXT: mulhu a5, a0, a2 -; RISCV32-NEXT: add a4, a5, a4 -; RISCV32-NEXT: sltu a6, a4, a5 -; RISCV32-NEXT: mulhu a5, a0, a3 -; RISCV32-NEXT: add a6, a5, a6 -; RISCV32-NEXT: mulhu a5, a1, a2 -; RISCV32-NEXT: add a7, a6, a5 +; RISCV32-NEXT: mulhu a7, a0, a2 +; RISCV32-NEXT: add a4, a7, a4 ; RISCV32-NEXT: mul a5, a1, a2 ; RISCV32-NEXT: add a6, a4, a5 -; RISCV32-NEXT: sltu a4, a6, a4 -; RISCV32-NEXT: add a4, a7, a4 +; RISCV32-NEXT: sltu t0, a6, a4 +; RISCV32-NEXT: sltu a4, a4, a7 +; RISCV32-NEXT: mulhu a5, a0, a3 +; RISCV32-NEXT: add a4, a5, a4 +; RISCV32-NEXT: mulhu a5, a1, a2 +; RISCV32-NEXT: add a4, a4, a5 +; RISCV32-NEXT: add a4, a4, t0 ; RISCV32-NEXT: mul a5, a1, a3 ; RISCV32-NEXT: add a5, a4, a5 ; RISCV32-NEXT: bgez a1, .LBB0_2 @@ -31,13 +31,13 @@ ; RISCV32-NEXT: # %bb.3: ; RISCV32-NEXT: sub a5, a5, a0 ; RISCV32-NEXT: .LBB0_4: +; RISCV32-NEXT: slli a1, a5, 30 +; RISCV32-NEXT: srli a3, a6, 2 +; RISCV32-NEXT: or a1, a3, a1 +; RISCV32-NEXT: slli a3, a6, 30 ; RISCV32-NEXT: mul a0, a0, a2 ; RISCV32-NEXT: srli a0, a0, 2 -; RISCV32-NEXT: slli a1, a6, 30 -; RISCV32-NEXT: or a0, a0, a1 -; RISCV32-NEXT: srli a1, a6, 2 -; RISCV32-NEXT: slli a2, a5, 30 -; RISCV32-NEXT: or a1, a1, a2 +; RISCV32-NEXT: or a0, a0, a3 ; RISCV32-NEXT: ret %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2); ret i64 %tmp; diff --git a/llvm/test/CodeGen/RISCV/alloca.ll b/llvm/test/CodeGen/RISCV/alloca.ll --- a/llvm/test/CodeGen/RISCV/alloca.ll +++ b/llvm/test/CodeGen/RISCV/alloca.ll @@ -82,8 +82,7 @@ ; RV32I-NEXT: sw a1, 8(sp) ; RV32I-NEXT: addi a1, zero, 10 ; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: addi a1, zero, 9 -; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: addi t0, zero, 9 ; RV32I-NEXT: addi a1, zero, 2 ; RV32I-NEXT: addi a2, zero, 3 ; RV32I-NEXT: addi a3, zero, 4 @@ -91,6 +90,7 @@ ; RV32I-NEXT: addi a5, zero, 6 ; RV32I-NEXT: addi a6, zero, 7 ; RV32I-NEXT: addi a7, zero, 8 +; RV32I-NEXT: sw t0, 0(sp) ; RV32I-NEXT: call func ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: addi sp, s0, -16 diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -123,8 +123,8 @@ ; ; RV32I-LABEL: slli: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a1, 7 ; RV32I-NEXT: srli a2, a0, 25 +; RV32I-NEXT: slli a1, a1, 7 ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: slli a0, a0, 7 ; RV32I-NEXT: ret @@ -140,8 +140,8 @@ ; ; RV32I-LABEL: srli: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: slli a2, a1, 24 +; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: ret @@ -157,8 +157,8 @@ ; ; RV32I-LABEL: srai: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a0, a0, 9 ; RV32I-NEXT: slli a2, a1, 23 +; RV32I-NEXT: srli a0, a0, 9 ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: srai a1, a1, 9 ; RV32I-NEXT: ret @@ -194,9 +194,9 @@ ; ; RV32I-LABEL: sub: ; RV32I: # %bb.0: +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sltu a3, a0, a2 -; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret %1 = sub i64 %a, %b @@ -218,13 +218,14 @@ ; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: addi a3, zero, 31 ; RV32I-NEXT: sub a3, a3, a2 ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srl a3, a4, a3 -; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: sll a2, a0, a2 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret %1 = shl i64 %a, %b ret i64 %1 @@ -304,13 +305,14 @@ ; RV32I-NEXT: mv a1, zero ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB15_2: +; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: addi a3, zero, 31 ; RV32I-NEXT: sub a3, a3, a2 ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: srl a2, a1, a2 +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret %1 = lshr i64 %a, %b ret i64 %1 @@ -331,11 +333,11 @@ ; RV32I-NEXT: srai a1, a1, 31 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB16_2: +; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: addi a3, zero, 31 ; RV32I-NEXT: sub a3, a3, a2 ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: sra a1, a1, a2 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/arith-with-overflow.ll b/llvm/test/CodeGen/RISCV/arith-with-overflow.ll --- a/llvm/test/CodeGen/RISCV/arith-with-overflow.ll +++ b/llvm/test/CodeGen/RISCV/arith-with-overflow.ll @@ -10,17 +10,17 @@ define i1 @sadd(i32 %a, i32 %b, i32* %c) nounwind { ; RV32I-LABEL: sadd: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: add a3, a0, a1 -; RV32I-NEXT: sw a3, 0(a2) -; RV32I-NEXT: addi a2, zero, -1 -; RV32I-NEXT: slt a1, a2, a1 -; RV32I-NEXT: slt a0, a2, a0 -; RV32I-NEXT: slt a2, a2, a3 -; RV32I-NEXT: xor a2, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: snez a1, a2 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: addi a3, zero, -1 +; RV32I-NEXT: slt a4, a3, a1 +; RV32I-NEXT: slt a5, a3, a0 +; RV32I-NEXT: xor a4, a5, a4 +; RV32I-NEXT: seqz a4, a4 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: slt a0, a3, a1 +; RV32I-NEXT: xor a0, a5, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: ret entry: %x = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) @@ -33,17 +33,17 @@ define i1 @ssub(i32 %a, i32 %b, i32* %c) nounwind { ; RV32I-LABEL: ssub: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: sub a3, a0, a1 -; RV32I-NEXT: sw a3, 0(a2) -; RV32I-NEXT: addi a2, zero, -1 -; RV32I-NEXT: slt a1, a2, a1 -; RV32I-NEXT: slt a0, a2, a0 -; RV32I-NEXT: slt a2, a2, a3 -; RV32I-NEXT: xor a2, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: addi a3, zero, -1 +; RV32I-NEXT: slt a4, a3, a1 +; RV32I-NEXT: slt a5, a3, a0 +; RV32I-NEXT: xor a4, a5, a4 +; RV32I-NEXT: snez a4, a4 +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: slt a0, a3, a1 +; RV32I-NEXT: xor a0, a5, a0 ; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: snez a1, a2 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: ret entry: %x = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) @@ -57,8 +57,8 @@ ; RV32I-LABEL: uadd: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: ret entry: %x = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) @@ -72,8 +72,8 @@ ; RV32I-LABEL: usub: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: ret entry: %x = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll @@ -7,7 +7,6 @@ ; higher bits were masked to zero for the comparison. define i1 @cmpxchg_i32_seq_cst_seq_cst(i32* %ptr, i32 signext %cmp, - i32 signext %val) nounwind { ; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64IA: # %bb.0: # %entry ; RV64IA-NEXT: .LBB0_1: # %entry @@ -22,6 +21,7 @@ ; RV64IA-NEXT: xor a0, a3, a1 ; RV64IA-NEXT: seqz a0, a0 ; RV64IA-NEXT: ret + i32 signext %val) nounwind { entry: %0 = cmpxchg i32* %ptr, i32 %cmp, i32 %val seq_cst seq_cst %1 = extractvalue { i32, i1 } %0, 1 diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -25,14 +25,14 @@ ; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -62,14 +62,14 @@ ; RV64IA-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -103,14 +103,14 @@ ; RV32IA-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -140,14 +140,14 @@ ; RV64IA-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -181,14 +181,14 @@ ; RV32IA-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -218,14 +218,14 @@ ; RV64IA-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -259,14 +259,14 @@ ; RV32IA-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -296,14 +296,14 @@ ; RV64IA-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -337,14 +337,14 @@ ; RV32IA-LABEL: cmpxchg_i8_release_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -374,14 +374,14 @@ ; RV64IA-LABEL: cmpxchg_i8_release_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -415,14 +415,14 @@ ; RV32IA-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -452,14 +452,14 @@ ; RV64IA-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -493,14 +493,14 @@ ; RV32IA-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -530,14 +530,14 @@ ; RV64IA-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -571,14 +571,14 @@ ; RV32IA-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -608,14 +608,14 @@ ; RV64IA-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -649,14 +649,14 @@ ; RV32IA-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -686,14 +686,14 @@ ; RV64IA-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -727,14 +727,14 @@ ; RV32IA-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a3, a3, 24 ; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: andi a2, a2, 255 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a3, (a0) ; RV32IA-NEXT: and a5, a3, a4 @@ -764,14 +764,14 @@ ; RV64IA-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 -; RV64IA-NEXT: andi a2, a2, 255 -; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 255 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: and a5, a3, a4 @@ -804,26 +804,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB10_3 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB10_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB10_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB10_1 ; RV32IA-NEXT: .LBB10_3: ; RV32IA-NEXT: ret ; @@ -842,26 +842,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB10_3 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB10_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB10_1 ; RV64IA-NEXT: .LBB10_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic @@ -884,26 +884,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB11_3 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB11_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB11_1 ; RV32IA-NEXT: .LBB11_3: ; RV32IA-NEXT: ret ; @@ -922,26 +922,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB11_3 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB11_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB11_1 ; RV64IA-NEXT: .LBB11_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire monotonic @@ -964,26 +964,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB12_3 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB12_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB12_1 ; RV32IA-NEXT: .LBB12_3: ; RV32IA-NEXT: ret ; @@ -1002,26 +1002,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB12_3 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB12_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB12_1 ; RV64IA-NEXT: .LBB12_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire @@ -1044,26 +1044,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB13_3 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB13_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB13_1 ; RV32IA-NEXT: .LBB13_3: ; RV32IA-NEXT: ret ; @@ -1082,26 +1082,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB13_3 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB13_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB13_1 ; RV64IA-NEXT: .LBB13_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release monotonic @@ -1124,26 +1124,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_release_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB14_3 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB14_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB14_1 ; RV32IA-NEXT: .LBB14_3: ; RV32IA-NEXT: ret ; @@ -1162,26 +1162,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_release_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB14_3 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB14_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB14_1 ; RV64IA-NEXT: .LBB14_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire @@ -1204,26 +1204,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB15_3 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB15_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB15_1 ; RV32IA-NEXT: .LBB15_3: ; RV32IA-NEXT: ret ; @@ -1242,26 +1242,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB15_3 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB15_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB15_1 ; RV64IA-NEXT: .LBB15_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acq_rel monotonic @@ -1284,26 +1284,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB16_3 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB16_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB16_1 ; RV32IA-NEXT: .LBB16_3: ; RV32IA-NEXT: ret ; @@ -1322,26 +1322,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB16_3 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB16_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB16_1 ; RV64IA-NEXT: .LBB16_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acq_rel acquire @@ -1364,26 +1364,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB17_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB17_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB17_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.aqrl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB17_1 ; RV32IA-NEXT: .LBB17_3: ; RV32IA-NEXT: ret ; @@ -1402,26 +1402,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB17_3 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB17_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.aqrl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB17_1 ; RV64IA-NEXT: .LBB17_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val seq_cst monotonic @@ -1444,26 +1444,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB18_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB18_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB18_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.aqrl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB18_1 ; RV32IA-NEXT: .LBB18_3: ; RV32IA-NEXT: ret ; @@ -1482,26 +1482,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB18_3 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB18_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.aqrl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB18_1 ; RV64IA-NEXT: .LBB18_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val seq_cst acquire @@ -1524,26 +1524,26 @@ ; ; RV32IA-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: andi a4, a4, 24 -; RV32IA-NEXT: sll a3, a3, a4 -; RV32IA-NEXT: sll a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: lui a4, 16 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: sll a5, a4, a3 +; RV32IA-NEXT: and a1, a1, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a4 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a5, a4, a3 -; RV32IA-NEXT: bne a5, a1, .LBB19_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB19_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB19_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.aqrl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB19_1 ; RV32IA-NEXT: .LBB19_3: ; RV32IA-NEXT: ret ; @@ -1562,26 +1562,26 @@ ; ; RV64IA-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: andi a4, a4, 24 -; RV64IA-NEXT: sllw a3, a3, a4 -; RV64IA-NEXT: sllw a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: sllw a5, a4, a3 +; RV64IA-NEXT: and a1, a1, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a4 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a5, a4, a3 -; RV64IA-NEXT: bne a5, a1, .LBB19_3 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 +; RV64IA-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a3 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV64IA-NEXT: bnez a5, .LBB19_1 +; RV64IA-NEXT: xor a4, a3, a2 +; RV64IA-NEXT: and a4, a4, a5 +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w.aqrl a4, a4, (a0) +; RV64IA-NEXT: bnez a4, .LBB19_1 ; RV64IA-NEXT: .LBB19_3: ; RV64IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val seq_cst seq_cst @@ -2173,12 +2173,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) @@ -2189,12 +2190,13 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) @@ -2233,13 +2235,14 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a6, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 @@ -2249,13 +2252,14 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) ; RV32IA-NEXT: addi sp, sp, 16 @@ -2293,12 +2297,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) @@ -2309,12 +2314,13 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) @@ -2353,13 +2359,14 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a6, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 3 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 @@ -2369,13 +2376,14 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 3 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) ; RV32IA-NEXT: addi sp, sp, 16 @@ -2413,12 +2421,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 4 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) @@ -2429,12 +2438,13 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 4 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) @@ -2473,13 +2483,14 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a6, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 @@ -2489,13 +2500,14 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) ; RV32IA-NEXT: addi sp, sp, 16 @@ -2533,12 +2545,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) @@ -2549,12 +2562,13 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 5 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) @@ -2593,13 +2607,14 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a6, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 @@ -2609,13 +2624,14 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) ; RV32IA-NEXT: addi sp, sp, 16 @@ -2653,13 +2669,14 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a6, a4 ; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 @@ -2669,13 +2686,14 @@ ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 ; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) ; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw ra, 12(sp) ; RV32IA-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -22,12 +22,12 @@ ; RV32IA-LABEL: atomicrmw_xchg_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: add a5, zero, a1 @@ -53,12 +53,12 @@ ; RV64IA-LABEL: atomicrmw_xchg_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: add a5, zero, a1 @@ -88,12 +88,12 @@ ; RV32IA-LABEL: atomicrmw_xchg_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: add a5, zero, a1 @@ -119,12 +119,12 @@ ; RV64IA-LABEL: atomicrmw_xchg_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: add a5, zero, a1 @@ -154,12 +154,12 @@ ; RV32IA-LABEL: atomicrmw_xchg_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: add a5, zero, a1 @@ -185,12 +185,12 @@ ; RV64IA-LABEL: atomicrmw_xchg_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: add a5, zero, a1 @@ -220,12 +220,12 @@ ; RV32IA-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: add a5, zero, a1 @@ -251,12 +251,12 @@ ; RV64IA-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: add a5, zero, a1 @@ -286,12 +286,12 @@ ; RV32IA-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a4, (a0) ; RV32IA-NEXT: add a5, zero, a1 @@ -317,12 +317,12 @@ ; RV64IA-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a4, (a0) ; RV64IA-NEXT: add a5, zero, a1 @@ -352,12 +352,12 @@ ; RV32IA-LABEL: atomicrmw_add_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: add a5, a4, a1 @@ -383,12 +383,12 @@ ; RV64IA-LABEL: atomicrmw_add_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: add a5, a4, a1 @@ -418,12 +418,12 @@ ; RV32IA-LABEL: atomicrmw_add_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: add a5, a4, a1 @@ -449,12 +449,12 @@ ; RV64IA-LABEL: atomicrmw_add_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: add a5, a4, a1 @@ -484,12 +484,12 @@ ; RV32IA-LABEL: atomicrmw_add_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: add a5, a4, a1 @@ -515,12 +515,12 @@ ; RV64IA-LABEL: atomicrmw_add_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: add a5, a4, a1 @@ -550,12 +550,12 @@ ; RV32IA-LABEL: atomicrmw_add_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: add a5, a4, a1 @@ -581,12 +581,12 @@ ; RV64IA-LABEL: atomicrmw_add_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: add a5, a4, a1 @@ -616,12 +616,12 @@ ; RV32IA-LABEL: atomicrmw_add_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a4, (a0) ; RV32IA-NEXT: add a5, a4, a1 @@ -647,12 +647,12 @@ ; RV64IA-LABEL: atomicrmw_add_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a4, (a0) ; RV64IA-NEXT: add a5, a4, a1 @@ -682,12 +682,12 @@ ; RV32IA-LABEL: atomicrmw_sub_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: sub a5, a4, a1 @@ -713,12 +713,12 @@ ; RV64IA-LABEL: atomicrmw_sub_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: sub a5, a4, a1 @@ -748,12 +748,12 @@ ; RV32IA-LABEL: atomicrmw_sub_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: sub a5, a4, a1 @@ -779,12 +779,12 @@ ; RV64IA-LABEL: atomicrmw_sub_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: sub a5, a4, a1 @@ -814,12 +814,12 @@ ; RV32IA-LABEL: atomicrmw_sub_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: sub a5, a4, a1 @@ -845,12 +845,12 @@ ; RV64IA-LABEL: atomicrmw_sub_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: sub a5, a4, a1 @@ -880,12 +880,12 @@ ; RV32IA-LABEL: atomicrmw_sub_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: sub a5, a4, a1 @@ -911,12 +911,12 @@ ; RV64IA-LABEL: atomicrmw_sub_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: sub a5, a4, a1 @@ -946,12 +946,12 @@ ; RV32IA-LABEL: atomicrmw_sub_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a4, (a0) ; RV32IA-NEXT: sub a5, a4, a1 @@ -977,12 +977,12 @@ ; RV64IA-LABEL: atomicrmw_sub_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a4, (a0) ; RV64IA-NEXT: sub a5, a4, a1 @@ -1011,15 +1011,15 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: or a1, a3, a1 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoand.w a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1036,15 +1036,15 @@ ; ; RV64IA-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a1, a1, 255 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: or a1, a3, a1 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoand.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1065,15 +1065,15 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: or a1, a3, a1 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoand.w.aq a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1090,15 +1090,15 @@ ; ; RV64IA-LABEL: atomicrmw_and_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a1, a1, 255 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: or a1, a3, a1 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoand.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1119,15 +1119,15 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: or a1, a3, a1 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoand.w.rl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1144,15 +1144,15 @@ ; ; RV64IA-LABEL: atomicrmw_and_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a1, a1, 255 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: or a1, a3, a1 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoand.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1173,15 +1173,15 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: or a1, a3, a1 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1198,15 +1198,15 @@ ; ; RV64IA-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a1, a1, 255 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: or a1, a3, a1 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoand.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1227,15 +1227,15 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: or a1, a3, a1 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1252,15 +1252,15 @@ ; ; RV64IA-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a1, a1, 255 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: or a1, a3, a1 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoand.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1282,12 +1282,12 @@ ; RV32IA-LABEL: atomicrmw_nand_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: and a5, a4, a1 @@ -1314,12 +1314,12 @@ ; RV64IA-LABEL: atomicrmw_nand_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: and a5, a4, a1 @@ -1350,12 +1350,12 @@ ; RV32IA-LABEL: atomicrmw_nand_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: and a5, a4, a1 @@ -1382,12 +1382,12 @@ ; RV64IA-LABEL: atomicrmw_nand_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: and a5, a4, a1 @@ -1418,12 +1418,12 @@ ; RV32IA-LABEL: atomicrmw_nand_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a4, (a0) ; RV32IA-NEXT: and a5, a4, a1 @@ -1450,12 +1450,12 @@ ; RV64IA-LABEL: atomicrmw_nand_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a4, (a0) ; RV64IA-NEXT: and a5, a4, a1 @@ -1486,12 +1486,12 @@ ; RV32IA-LABEL: atomicrmw_nand_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a4, (a0) ; RV32IA-NEXT: and a5, a4, a1 @@ -1518,12 +1518,12 @@ ; RV64IA-LABEL: atomicrmw_nand_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a4, (a0) ; RV64IA-NEXT: and a5, a4, a1 @@ -1554,12 +1554,12 @@ ; RV32IA-LABEL: atomicrmw_nand_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a4, (a0) ; RV32IA-NEXT: and a5, a4, a1 @@ -1586,12 +1586,12 @@ ; RV64IA-LABEL: atomicrmw_nand_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a4, (a0) ; RV64IA-NEXT: and a5, a4, a1 @@ -1621,11 +1621,11 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1642,11 +1642,11 @@ ; ; RV64IA-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1667,11 +1667,11 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.aq a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1688,11 +1688,11 @@ ; ; RV64IA-LABEL: atomicrmw_or_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1713,11 +1713,11 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.rl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1734,11 +1734,11 @@ ; ; RV64IA-LABEL: atomicrmw_or_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1759,11 +1759,11 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1780,11 +1780,11 @@ ; ; RV64IA-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1805,11 +1805,11 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1826,11 +1826,11 @@ ; ; RV64IA-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1851,11 +1851,11 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1872,11 +1872,11 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1897,11 +1897,11 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.aq a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1918,11 +1918,11 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1943,11 +1943,11 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.rl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -1964,11 +1964,11 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -1989,11 +1989,11 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -2010,11 +2010,11 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -2035,11 +2035,11 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -2056,11 +2056,11 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -2079,31 +2079,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB35_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB35_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB35_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB35_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB35_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB35_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2115,29 +2115,29 @@ ; RV32IA-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB35_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB35_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB35_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB35_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -2152,31 +2152,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB35_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB35_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB35_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB35_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB35_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2188,29 +2188,29 @@ ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB35_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB35_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB35_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB35_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -2229,31 +2229,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB36_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB36_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB36_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB36_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB36_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB36_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB36_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB36_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2265,29 +2265,29 @@ ; RV32IA-LABEL: atomicrmw_max_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB36_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB36_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB36_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB36_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -2302,31 +2302,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB36_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB36_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB36_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB36_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB36_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB36_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB36_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB36_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2338,29 +2338,29 @@ ; RV64IA-LABEL: atomicrmw_max_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB36_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB36_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB36_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB36_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -2379,31 +2379,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB37_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB37_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB37_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB37_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB37_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB37_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB37_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB37_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2415,29 +2415,29 @@ ; RV32IA-LABEL: atomicrmw_max_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB37_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB37_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB37_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB37_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -2452,31 +2452,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB37_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB37_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB37_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB37_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB37_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB37_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB37_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB37_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2488,29 +2488,29 @@ ; RV64IA-LABEL: atomicrmw_max_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB37_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB37_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB37_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB37_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -2529,31 +2529,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB38_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB38_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB38_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB38_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB38_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB38_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 -; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB38_1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB38_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2565,29 +2565,29 @@ ; RV32IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB38_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB38_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB38_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB38_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -2602,31 +2602,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB38_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB38_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB38_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB38_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB38_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB38_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB38_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB38_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2638,29 +2638,29 @@ ; RV64IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB38_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB38_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB38_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB38_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -2679,31 +2679,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB39_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB39_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB39_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB39_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB39_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB39_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB39_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB39_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2715,29 +2715,29 @@ ; RV32IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB39_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB39_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB39_1 +; RV32IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB39_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -2752,31 +2752,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB39_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB39_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB39_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB39_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB39_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB39_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB39_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB39_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2788,29 +2788,29 @@ ; RV64IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB39_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB39_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB39_1 +; RV64IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB39_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -2829,31 +2829,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB40_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB40_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB40_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB40_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB40_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB40_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB40_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB40_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2865,29 +2865,29 @@ ; RV32IA-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB40_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB40_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB40_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB40_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -2902,31 +2902,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB40_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB40_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB40_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB40_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB40_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB40_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB40_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB40_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2938,29 +2938,29 @@ ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB40_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB40_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB40_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB40_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -2979,31 +2979,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB41_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB41_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB41_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB41_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB41_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB41_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB41_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB41_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3015,29 +3015,29 @@ ; RV32IA-LABEL: atomicrmw_min_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB41_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB41_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB41_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB41_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -3052,31 +3052,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB41_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB41_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB41_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB41_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB41_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB41_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB41_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB41_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3088,29 +3088,29 @@ ; RV64IA-LABEL: atomicrmw_min_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB41_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB41_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB41_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB41_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -3129,31 +3129,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB42_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB42_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB42_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB42_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB42_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB42_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB42_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB42_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3165,29 +3165,29 @@ ; RV32IA-LABEL: atomicrmw_min_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB42_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB42_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB42_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB42_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -3202,31 +3202,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB42_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB42_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB42_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB42_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB42_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB42_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB42_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB42_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3238,29 +3238,29 @@ ; RV64IA-LABEL: atomicrmw_min_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB42_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB42_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB42_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB42_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -3279,31 +3279,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB43_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB43_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB43_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB43_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB43_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3315,29 +3315,29 @@ ; RV32IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB43_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB43_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB43_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB43_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -3352,31 +3352,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB43_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB43_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB43_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB43_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB43_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB43_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3388,29 +3388,29 @@ ; RV64IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB43_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB43_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB43_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB43_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -3429,31 +3429,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srai s0, a1, 24 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 24 +; RV32I-NEXT: srai s0, a0, 24 ; RV32I-NEXT: addi s3, sp, 11 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB44_3 +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB44_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB44_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a1, 11(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB44_1 +; RV32I-NEXT: lb a1, 11(sp) +; RV32I-NEXT: beqz a0, .LBB44_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3465,29 +3465,29 @@ ; RV32IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 24 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: addi a4, zero, 255 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: addi a3, zero, 255 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 24 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB44_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB44_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB44_1 +; RV32IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB44_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -3502,31 +3502,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srai s0, a1, 56 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 56 +; RV64I-NEXT: srai s0, a0, 56 ; RV64I-NEXT: addi s3, sp, 7 ; RV64I-NEXT: .LBB44_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB44_3 +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB44_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB44_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a1, 7(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB44_1 +; RV64I-NEXT: lb a1, 7(sp) +; RV64I-NEXT: beqz a0, .LBB44_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3538,29 +3538,29 @@ ; RV64IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 56 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: addi a4, zero, 255 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: addi a3, zero, 255 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 56 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB44_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB44_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB44_1 +; RV64IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB44_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -3577,31 +3577,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB45_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a0, .LBB45_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB45_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB45_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB45_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3613,23 +3613,23 @@ ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a3, a1, .LBB45_3 +; RV32IA-NEXT: bgeu a0, a1, .LBB45_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB45_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -3643,31 +3643,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB45_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB45_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB45_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB45_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB45_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB45_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3679,23 +3679,23 @@ ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a3, a1, .LBB45_3 +; RV64IA-NEXT: bgeu a0, a1, .LBB45_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB45_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -3713,31 +3713,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB46_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a0, .LBB46_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB46_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB46_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB46_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3749,23 +3749,23 @@ ; RV32IA-LABEL: atomicrmw_umax_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a3, a1, .LBB46_3 +; RV32IA-NEXT: bgeu a0, a1, .LBB46_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB46_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -3779,31 +3779,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB46_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB46_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB46_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB46_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB46_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB46_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3815,23 +3815,23 @@ ; RV64IA-LABEL: atomicrmw_umax_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w.aq a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a3, a1, .LBB46_3 +; RV64IA-NEXT: bgeu a0, a1, .LBB46_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB46_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -3849,31 +3849,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB47_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB47_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a0, .LBB47_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB47_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB47_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB47_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3885,23 +3885,23 @@ ; RV32IA-LABEL: atomicrmw_umax_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a3, a1, .LBB47_3 +; RV32IA-NEXT: bgeu a0, a1, .LBB47_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB47_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -3915,31 +3915,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB47_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB47_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB47_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB47_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB47_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB47_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3951,23 +3951,23 @@ ; RV64IA-LABEL: atomicrmw_umax_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a3, a1, .LBB47_3 +; RV64IA-NEXT: bgeu a0, a1, .LBB47_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB47_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -3985,31 +3985,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB48_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB48_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a0, .LBB48_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB48_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB48_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB48_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4021,23 +4021,23 @@ ; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a3, a1, .LBB48_3 +; RV32IA-NEXT: bgeu a0, a1, .LBB48_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB48_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4051,31 +4051,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB48_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB48_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB48_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB48_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB48_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB48_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4087,23 +4087,23 @@ ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w.aq a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a3, a1, .LBB48_3 +; RV64IA-NEXT: bgeu a0, a1, .LBB48_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB48_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4121,31 +4121,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB49_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB49_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a0, .LBB49_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB49_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB49_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB49_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4157,23 +4157,23 @@ ; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w.aqrl a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a3, a1, .LBB49_3 +; RV32IA-NEXT: bgeu a0, a1, .LBB49_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB49_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4187,31 +4187,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB49_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB49_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB49_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB49_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB49_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB49_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4223,23 +4223,23 @@ ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w.aqrl a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a3, a1, .LBB49_3 +; RV64IA-NEXT: bgeu a0, a1, .LBB49_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB49_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4257,31 +4257,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB50_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB50_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a0, .LBB50_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB50_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB50_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB50_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4293,23 +4293,23 @@ ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a3, .LBB50_3 +; RV32IA-NEXT: bgeu a1, a0, .LBB50_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB50_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4323,31 +4323,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB50_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB50_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB50_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB50_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB50_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB50_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4359,23 +4359,23 @@ ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a3, .LBB50_3 +; RV64IA-NEXT: bgeu a1, a0, .LBB50_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB50_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4393,31 +4393,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB51_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB51_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a0, .LBB51_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB51_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB51_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB51_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4429,23 +4429,23 @@ ; RV32IA-LABEL: atomicrmw_umin_i8_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a3, .LBB51_3 +; RV32IA-NEXT: bgeu a1, a0, .LBB51_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB51_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4459,31 +4459,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB51_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB51_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB51_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB51_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB51_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB51_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4495,23 +4495,23 @@ ; RV64IA-LABEL: atomicrmw_umin_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w.aq a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a3, .LBB51_3 +; RV64IA-NEXT: bgeu a1, a0, .LBB51_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB51_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4529,31 +4529,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB52_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB52_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a0, .LBB52_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB52_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB52_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB52_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4565,23 +4565,23 @@ ; RV32IA-LABEL: atomicrmw_umin_i8_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a3, .LBB52_3 +; RV32IA-NEXT: bgeu a1, a0, .LBB52_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB52_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4595,31 +4595,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB52_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB52_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB52_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB52_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB52_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB52_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4631,23 +4631,23 @@ ; RV64IA-LABEL: atomicrmw_umin_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a3, .LBB52_3 +; RV64IA-NEXT: bgeu a1, a0, .LBB52_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB52_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4665,31 +4665,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB53_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB53_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a0, .LBB53_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB53_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB53_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB53_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4701,23 +4701,23 @@ ; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a3, .LBB53_3 +; RV32IA-NEXT: bgeu a1, a0, .LBB53_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB53_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4731,31 +4731,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB53_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB53_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB53_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB53_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB53_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB53_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4767,23 +4767,23 @@ ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w.aq a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a3, .LBB53_3 +; RV64IA-NEXT: bgeu a1, a0, .LBB53_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB53_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4801,31 +4801,31 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: andi s0, a1, 255 -; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: addi s2, sp, 11 ; RV32I-NEXT: .LBB54_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB54_3 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a0, .LBB54_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_1 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: .LBB54_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_1 Depth=1 -; RV32I-NEXT: sb a0, 11(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sb a3, 11(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB54_1 +; RV32I-NEXT: lb a3, 11(sp) +; RV32I-NEXT: beqz a0, .LBB54_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4837,23 +4837,23 @@ ; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 ; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a6, a3, a2 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a3, a4, a6 +; RV32IA-NEXT: lr.w.aqrl a4, (a6) +; RV32IA-NEXT: and a0, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a3, .LBB54_3 +; RV32IA-NEXT: bgeu a1, a0, .LBB54_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB54_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a2 @@ -4867,31 +4867,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: andi s0, a1, 255 -; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: addi s2, sp, 7 ; RV64I-NEXT: .LBB54_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB54_3 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB54_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB54_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_1 Depth=1 -; RV64I-NEXT: sb a0, 7(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB54_1 +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB54_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4903,23 +4903,23 @@ ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sllw a6, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a3, a4, a6 +; RV64IA-NEXT: lr.w.aqrl a4, (a6) +; RV64IA-NEXT: and a0, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a3, .LBB54_3 +; RV64IA-NEXT: bgeu a1, a0, .LBB54_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB54_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a2 @@ -4941,24 +4941,24 @@ ; ; RV32IA-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) +; RV32IA-NEXT: lr.w a3, (a0) ; RV32IA-NEXT: add a5, zero, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB55_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_monotonic: @@ -4973,24 +4973,24 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) +; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: add a5, zero, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB55_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw xchg i16* %a, i16 %b monotonic ret i16 %1 @@ -5009,24 +5009,24 @@ ; ; RV32IA-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) +; RV32IA-NEXT: lr.w.aq a3, (a0) ; RV32IA-NEXT: add a5, zero, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB56_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_acquire: @@ -5041,24 +5041,24 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) +; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: add a5, zero, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB56_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw xchg i16* %a, i16 %b acquire ret i16 %1 @@ -5077,24 +5077,24 @@ ; ; RV32IA-LABEL: atomicrmw_xchg_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) +; RV32IA-NEXT: lr.w a3, (a0) ; RV32IA-NEXT: add a5, zero, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB57_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_release: @@ -5109,24 +5109,24 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) +; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: add a5, zero, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB57_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw xchg i16* %a, i16 %b release ret i16 %1 @@ -5145,24 +5145,24 @@ ; ; RV32IA-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) +; RV32IA-NEXT: lr.w.aq a3, (a0) ; RV32IA-NEXT: add a5, zero, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB58_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel: @@ -5177,24 +5177,24 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) +; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: add a5, zero, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB58_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw xchg i16* %a, i16 %b acq_rel ret i16 %1 @@ -5213,24 +5213,24 @@ ; ; RV32IA-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: lr.w.aqrl a3, (a0) ; RV32IA-NEXT: add a5, zero, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB59_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_seq_cst: @@ -5245,24 +5245,24 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: add a5, zero, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB59_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw xchg i16* %a, i16 %b seq_cst ret i16 %1 @@ -5281,24 +5281,24 @@ ; ; RV32IA-LABEL: atomicrmw_add_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: add a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB60_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_monotonic: @@ -5313,24 +5313,24 @@ ; ; RV64IA-LABEL: atomicrmw_add_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: add a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB60_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw add i16* %a, i16 %b monotonic ret i16 %1 @@ -5349,24 +5349,24 @@ ; ; RV32IA-LABEL: atomicrmw_add_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: add a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB61_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_acquire: @@ -5381,24 +5381,24 @@ ; ; RV64IA-LABEL: atomicrmw_add_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: add a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB61_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw add i16* %a, i16 %b acquire ret i16 %1 @@ -5417,24 +5417,24 @@ ; ; RV32IA-LABEL: atomicrmw_add_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: add a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB62_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_release: @@ -5449,24 +5449,24 @@ ; ; RV64IA-LABEL: atomicrmw_add_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: add a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB62_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw add i16* %a, i16 %b release ret i16 %1 @@ -5485,24 +5485,24 @@ ; ; RV32IA-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: add a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB63_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_acq_rel: @@ -5517,24 +5517,24 @@ ; ; RV64IA-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: add a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB63_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw add i16* %a, i16 %b acq_rel ret i16 %1 @@ -5553,24 +5553,24 @@ ; ; RV32IA-LABEL: atomicrmw_add_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: add a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: add a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB64_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_seq_cst: @@ -5585,24 +5585,24 @@ ; ; RV64IA-LABEL: atomicrmw_add_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: add a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: add a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB64_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw add i16* %a, i16 %b seq_cst ret i16 %1 @@ -5621,24 +5621,24 @@ ; ; RV32IA-LABEL: atomicrmw_sub_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: sub a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB65_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_monotonic: @@ -5653,24 +5653,24 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: sub a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB65_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw sub i16* %a, i16 %b monotonic ret i16 %1 @@ -5689,24 +5689,24 @@ ; ; RV32IA-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: sub a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB66_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_acquire: @@ -5721,24 +5721,24 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: sub a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB66_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw sub i16* %a, i16 %b acquire ret i16 %1 @@ -5757,24 +5757,24 @@ ; ; RV32IA-LABEL: atomicrmw_sub_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: sub a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB67_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_release: @@ -5789,24 +5789,24 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: sub a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB67_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw sub i16* %a, i16 %b release ret i16 %1 @@ -5825,24 +5825,24 @@ ; ; RV32IA-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: sub a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB68_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_acq_rel: @@ -5857,24 +5857,24 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: sub a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB68_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw sub i16* %a, i16 %b acq_rel ret i16 %1 @@ -5893,24 +5893,24 @@ ; ; RV32IA-LABEL: atomicrmw_sub_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: sub a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: sub a5, a3, a1 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB69_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_seq_cst: @@ -5925,24 +5925,24 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: sub a5, a4, a1 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: sub a5, a3, a1 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB69_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw sub i16* %a, i16 %b seq_cst ret i16 %1 @@ -5961,18 +5961,18 @@ ; ; RV32IA-LABEL: atomicrmw_and_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: not a2, a2 -; RV32IA-NEXT: or a1, a2, a1 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: or a1, a4, a1 ; RV32IA-NEXT: amoand.w a0, a1, (a0) -; RV32IA-NEXT: srl a0, a0, a3 +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_monotonic: @@ -5987,18 +5987,18 @@ ; ; RV64IA-LABEL: atomicrmw_and_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: not a2, a2 -; RV64IA-NEXT: or a1, a2, a1 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: or a1, a4, a1 ; RV64IA-NEXT: amoand.w a0, a1, (a0) -; RV64IA-NEXT: srlw a0, a0, a3 +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b monotonic ret i16 %1 @@ -6017,18 +6017,18 @@ ; ; RV32IA-LABEL: atomicrmw_and_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: not a2, a2 -; RV32IA-NEXT: or a1, a2, a1 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: or a1, a4, a1 ; RV32IA-NEXT: amoand.w.aq a0, a1, (a0) -; RV32IA-NEXT: srl a0, a0, a3 +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_acquire: @@ -6043,18 +6043,18 @@ ; ; RV64IA-LABEL: atomicrmw_and_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: not a2, a2 -; RV64IA-NEXT: or a1, a2, a1 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: or a1, a4, a1 ; RV64IA-NEXT: amoand.w.aq a0, a1, (a0) -; RV64IA-NEXT: srlw a0, a0, a3 +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b acquire ret i16 %1 @@ -6073,18 +6073,18 @@ ; ; RV32IA-LABEL: atomicrmw_and_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: not a2, a2 -; RV32IA-NEXT: or a1, a2, a1 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: or a1, a4, a1 ; RV32IA-NEXT: amoand.w.rl a0, a1, (a0) -; RV32IA-NEXT: srl a0, a0, a3 +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_release: @@ -6099,18 +6099,18 @@ ; ; RV64IA-LABEL: atomicrmw_and_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: not a2, a2 -; RV64IA-NEXT: or a1, a2, a1 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: or a1, a4, a1 ; RV64IA-NEXT: amoand.w.rl a0, a1, (a0) -; RV64IA-NEXT: srlw a0, a0, a3 +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b release ret i16 %1 @@ -6129,18 +6129,18 @@ ; ; RV32IA-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: not a2, a2 -; RV32IA-NEXT: or a1, a2, a1 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: or a1, a4, a1 ; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) -; RV32IA-NEXT: srl a0, a0, a3 +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_acq_rel: @@ -6155,18 +6155,18 @@ ; ; RV64IA-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: not a2, a2 -; RV64IA-NEXT: or a1, a2, a1 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: or a1, a4, a1 ; RV64IA-NEXT: amoand.w.aqrl a0, a1, (a0) -; RV64IA-NEXT: srlw a0, a0, a3 +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b acq_rel ret i16 %1 @@ -6185,18 +6185,18 @@ ; ; RV32IA-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: not a2, a2 -; RV32IA-NEXT: or a1, a2, a1 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: or a1, a4, a1 ; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) -; RV32IA-NEXT: srl a0, a0, a3 +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_seq_cst: @@ -6211,18 +6211,18 @@ ; ; RV64IA-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: not a2, a2 -; RV64IA-NEXT: or a1, a2, a1 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: or a1, a4, a1 ; RV64IA-NEXT: amoand.w.aqrl a0, a1, (a0) -; RV64IA-NEXT: srlw a0, a0, a3 +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b seq_cst ret i16 %1 @@ -6241,25 +6241,25 @@ ; ; RV32IA-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a1 ; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB75_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_monotonic: @@ -6274,25 +6274,25 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a5, a4, a1 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a5, a3, a1 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB75_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw nand i16* %a, i16 %b monotonic ret i16 %1 @@ -6311,25 +6311,25 @@ ; ; RV32IA-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a5, a3, a1 ; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB76_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_acquire: @@ -6344,25 +6344,25 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a5, a4, a1 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: and a5, a3, a1 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB76_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw nand i16* %a, i16 %b acquire ret i16 %1 @@ -6381,25 +6381,25 @@ ; ; RV32IA-LABEL: atomicrmw_nand_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a1 ; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB77_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_release: @@ -6414,25 +6414,25 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a5, a4, a1 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a5, a3, a1 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB77_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw nand i16* %a, i16 %b release ret i16 %1 @@ -6451,25 +6451,25 @@ ; ; RV32IA-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a5, a3, a1 ; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB78_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_acq_rel: @@ -6484,25 +6484,25 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a5, a4, a1 +; RV64IA-NEXT: lr.w.aq a3, (a0) +; RV64IA-NEXT: and a5, a3, a1 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB78_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw nand i16* %a, i16 %b acq_rel ret i16 %1 @@ -6521,25 +6521,25 @@ ; ; RV32IA-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a1 ; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB79_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_seq_cst: @@ -6554,25 +6554,25 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a2, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a5, a4, a1 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: and a5, a3, a1 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: and a5, a5, a2 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB79_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw nand i16* %a, i16 %b seq_cst ret i16 %1 @@ -6591,13 +6591,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6614,13 +6614,13 @@ ; ; RV64IA-LABEL: atomicrmw_or_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6641,13 +6641,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.aq a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6664,13 +6664,13 @@ ; ; RV64IA-LABEL: atomicrmw_or_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6691,13 +6691,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.rl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6714,13 +6714,13 @@ ; ; RV64IA-LABEL: atomicrmw_or_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6741,13 +6741,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6764,13 +6764,13 @@ ; ; RV64IA-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6791,13 +6791,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6814,13 +6814,13 @@ ; ; RV64IA-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6841,13 +6841,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6864,13 +6864,13 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6891,13 +6891,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.aq a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6914,13 +6914,13 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6941,13 +6941,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.rl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -6964,13 +6964,13 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -6991,13 +6991,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -7014,13 +7014,13 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -7041,13 +7041,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret @@ -7064,13 +7064,13 @@ ; ; RV64IA-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: ret @@ -7089,31 +7089,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB90_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB90_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB90_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB90_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB90_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB90_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB90_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB90_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7125,30 +7125,30 @@ ; RV32IA-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB90_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB90_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB90_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB90_3: # in Loop: Header=BB90_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB90_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB90_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -7163,31 +7163,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB90_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB90_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB90_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB90_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB90_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB90_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB90_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB90_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7199,30 +7199,30 @@ ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB90_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB90_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB90_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB90_3: # in Loop: Header=BB90_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB90_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB90_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -7241,31 +7241,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB91_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB91_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB91_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB91_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB91_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB91_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB91_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB91_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7277,30 +7277,30 @@ ; RV32IA-LABEL: atomicrmw_max_i16_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB91_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB91_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB91_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB91_3: # in Loop: Header=BB91_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB91_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB91_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -7315,31 +7315,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB91_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB91_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB91_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB91_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB91_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB91_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB91_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB91_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7351,30 +7351,30 @@ ; RV64IA-LABEL: atomicrmw_max_i16_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB91_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB91_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB91_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB91_3: # in Loop: Header=BB91_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB91_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB91_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -7393,31 +7393,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB92_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB92_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB92_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB92_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB92_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB92_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB92_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB92_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7429,30 +7429,30 @@ ; RV32IA-LABEL: atomicrmw_max_i16_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB92_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB92_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB92_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB92_3: # in Loop: Header=BB92_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB92_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB92_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -7467,31 +7467,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB92_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB92_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB92_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB92_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB92_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB92_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB92_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB92_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7503,30 +7503,30 @@ ; RV64IA-LABEL: atomicrmw_max_i16_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB92_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB92_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB92_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB92_3: # in Loop: Header=BB92_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB92_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB92_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -7545,31 +7545,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB93_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB93_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB93_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB93_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB93_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB93_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB93_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB93_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7581,30 +7581,30 @@ ; RV32IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB93_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB93_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB93_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB93_3: # in Loop: Header=BB93_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB93_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB93_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -7619,31 +7619,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB93_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB93_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB93_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB93_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB93_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB93_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB93_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB93_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7655,30 +7655,30 @@ ; RV64IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB93_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB93_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB93_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB93_3: # in Loop: Header=BB93_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB93_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB93_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -7697,31 +7697,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB94_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB94_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: blt s0, a0, .LBB94_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB94_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB94_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB94_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB94_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB94_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7733,30 +7733,30 @@ ; RV32IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a4, a1, .LBB94_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a3, a1, .LBB94_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB94_1 +; RV32IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB94_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -7771,31 +7771,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB94_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB94_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: blt s0, a0, .LBB94_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB94_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB94_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB94_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB94_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB94_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7807,30 +7807,30 @@ ; RV64IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a4, a1, .LBB94_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a3, a1, .LBB94_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB94_1 +; RV64IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB94_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -7849,31 +7849,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB95_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB95_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB95_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB95_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB95_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB95_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB95_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB95_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7885,30 +7885,30 @@ ; RV32IA-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB95_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB95_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB95_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB95_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -7923,31 +7923,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB95_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB95_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB95_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB95_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB95_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB95_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB95_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB95_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7959,30 +7959,30 @@ ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB95_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB95_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB95_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB95_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -8001,31 +8001,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB96_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB96_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB96_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB96_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB96_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB96_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB96_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB96_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8037,30 +8037,30 @@ ; RV32IA-LABEL: atomicrmw_min_i16_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB96_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB96_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1 -; RV32IA-NEXT: sc.w a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB96_1 +; RV32IA-NEXT: sc.w a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB96_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -8075,31 +8075,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB96_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB96_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB96_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB96_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB96_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB96_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB96_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB96_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8111,30 +8111,30 @@ ; RV64IA-LABEL: atomicrmw_min_i16_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB96_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB96_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1 -; RV64IA-NEXT: sc.w a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB96_1 +; RV64IA-NEXT: sc.w a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB96_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -8153,31 +8153,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB97_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB97_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB97_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB97_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB97_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB97_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB97_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB97_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8189,30 +8189,30 @@ ; RV32IA-LABEL: atomicrmw_min_i16_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB97_3 +; RV32IA-NEXT: lr.w a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB97_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB97_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB97_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -8227,31 +8227,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB97_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB97_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB97_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB97_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB97_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB97_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB97_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB97_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8263,30 +8263,30 @@ ; RV64IA-LABEL: atomicrmw_min_i16_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB97_3 +; RV64IA-NEXT: lr.w a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB97_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB97_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB97_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -8305,31 +8305,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB98_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB98_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB98_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB98_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB98_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB98_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB98_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB98_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8341,30 +8341,30 @@ ; RV32IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB98_3 +; RV32IA-NEXT: lr.w.aq a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB98_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB98_1 +; RV32IA-NEXT: sc.w.rl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB98_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -8379,31 +8379,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB98_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB98_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB98_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB98_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB98_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB98_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB98_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB98_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8415,30 +8415,30 @@ ; RV64IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB98_3 +; RV64IA-NEXT: lr.w.aq a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB98_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB98_1 +; RV64IA-NEXT: sc.w.rl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB98_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -8457,31 +8457,31 @@ ; RV32I-NEXT: sw s3, 12(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: srai s0, a1, 16 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s2, 16 +; RV32I-NEXT: srai s0, a0, 16 ; RV32I-NEXT: addi s3, sp, 10 ; RV32I-NEXT: .LBB99_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB99_3 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bge s0, a0, .LBB99_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB99_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB99_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB99_1 Depth=1 -; RV32I-NEXT: sh a0, 10(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB99_1 +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: beqz a0, .LBB99_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8493,30 +8493,30 @@ ; RV32IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 16 -; RV32IA-NEXT: sub a6, a3, a2 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a2 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a7, a3, a2 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a2 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: addi a4, zero, 16 +; RV32IA-NEXT: sub a4, a4, a2 ; RV32IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a0) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: sll a4, a4, a6 -; RV32IA-NEXT: sra a4, a4, a6 -; RV32IA-NEXT: bge a1, a4, .LBB99_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a6) +; RV32IA-NEXT: and a3, a5, a7 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sra a3, a3, a4 +; RV32IA-NEXT: bge a1, a3, .LBB99_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1 -; RV32IA-NEXT: xor a3, a5, a1 -; RV32IA-NEXT: and a3, a3, a7 -; RV32IA-NEXT: xor a3, a5, a3 +; RV32IA-NEXT: xor a0, a5, a1 +; RV32IA-NEXT: and a0, a0, a7 +; RV32IA-NEXT: xor a0, a5, a0 ; RV32IA-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV32IA-NEXT: bnez a3, .LBB99_1 +; RV32IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV32IA-NEXT: bnez a0, .LBB99_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a2 ; RV32IA-NEXT: ret @@ -8531,31 +8531,31 @@ ; RV64I-NEXT: sd s3, 8(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: srai s0, a1, 48 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s2, 48 +; RV64I-NEXT: srai s0, a0, 48 ; RV64I-NEXT: addi s3, sp, 6 ; RV64I-NEXT: .LBB99_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB99_3 +; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bge s0, a0, .LBB99_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB99_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB99_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB99_1 Depth=1 -; RV64I-NEXT: sh a0, 6(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB99_1 +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: beqz a0, .LBB99_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8567,30 +8567,30 @@ ; RV64IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 ; RV64IA-NEXT: andi a2, a2, 24 -; RV64IA-NEXT: addi a3, zero, 48 -; RV64IA-NEXT: sub a6, a3, a2 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a2 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a7, a3, a2 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a2 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: addi a4, zero, 48 +; RV64IA-NEXT: sub a4, a4, a2 ; RV64IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a0) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a3, a5 -; RV64IA-NEXT: sll a4, a4, a6 -; RV64IA-NEXT: sra a4, a4, a6 -; RV64IA-NEXT: bge a1, a4, .LBB99_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a6) +; RV64IA-NEXT: and a3, a5, a7 +; RV64IA-NEXT: mv a0, a5 +; RV64IA-NEXT: sll a3, a3, a4 +; RV64IA-NEXT: sra a3, a3, a4 +; RV64IA-NEXT: bge a1, a3, .LBB99_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1 -; RV64IA-NEXT: xor a3, a5, a1 -; RV64IA-NEXT: and a3, a3, a7 -; RV64IA-NEXT: xor a3, a5, a3 +; RV64IA-NEXT: xor a0, a5, a1 +; RV64IA-NEXT: and a0, a0, a7 +; RV64IA-NEXT: xor a0, a5, a0 ; RV64IA-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a3, a3, (a0) -; RV64IA-NEXT: bnez a3, .LBB99_1 +; RV64IA-NEXT: sc.w.aqrl a0, a0, (a6) +; RV64IA-NEXT: bnez a0, .LBB99_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a2 ; RV64IA-NEXT: ret @@ -8610,31 +8610,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB100_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB100_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bltu s1, a0, .LBB100_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB100_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB100_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB100_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB100_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB100_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -8646,28 +8646,28 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB100_3 +; RV32IA-NEXT: lr.w a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a0, a1, .LBB100_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB100_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_monotonic: @@ -8681,31 +8681,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB100_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB100_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bltu s1, a0, .LBB100_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB100_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB100_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB100_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB100_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB100_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -8717,28 +8717,28 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB100_3 +; RV64IA-NEXT: lr.w a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a0, a1, .LBB100_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB100_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umax i16* %a, i16 %b monotonic ret i16 %1 @@ -8756,31 +8756,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB101_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB101_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bltu s1, a0, .LBB101_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB101_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB101_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB101_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB101_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB101_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -8792,28 +8792,28 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB101_3 +; RV32IA-NEXT: lr.w.aq a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a0, a1, .LBB101_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB101_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_acquire: @@ -8827,31 +8827,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB101_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB101_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bltu s1, a0, .LBB101_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB101_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB101_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB101_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB101_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB101_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -8863,28 +8863,28 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB101_3 +; RV64IA-NEXT: lr.w.aq a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a0, a1, .LBB101_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB101_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umax i16* %a, i16 %b acquire ret i16 %1 @@ -8902,31 +8902,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB102_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB102_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bltu s1, a0, .LBB102_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB102_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB102_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB102_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) +; RV32I-NEXT: sh a1, 6(sp) +; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB102_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB102_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -8938,28 +8938,28 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB102_3 +; RV32IA-NEXT: lr.w a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a0, a1, .LBB102_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB102_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB102_3: # in Loop: Header=BB102_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB102_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_release: @@ -8973,31 +8973,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB102_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB102_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bltu s1, a0, .LBB102_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB102_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB102_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB102_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) +; RV64I-NEXT: sh a1, 14(sp) +; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB102_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB102_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9009,28 +9009,28 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB102_3 +; RV64IA-NEXT: lr.w a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a0, a1, .LBB102_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB102_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB102_3: # in Loop: Header=BB102_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB102_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umax i16* %a, i16 %b release ret i16 %1 @@ -9048,31 +9048,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB103_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB103_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bltu s1, a0, .LBB103_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB103_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB103_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB103_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB103_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB103_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9084,28 +9084,28 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB103_3 +; RV32IA-NEXT: lr.w.aq a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a0, a1, .LBB103_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB103_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB103_3: # in Loop: Header=BB103_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB103_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_acq_rel: @@ -9119,31 +9119,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB103_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB103_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bltu s1, a0, .LBB103_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB103_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB103_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB103_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB103_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB103_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9155,28 +9155,28 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB103_3 +; RV64IA-NEXT: lr.w.aq a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a0, a1, .LBB103_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB103_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB103_3: # in Loop: Header=BB103_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB103_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umax i16* %a, i16 %b acq_rel ret i16 %1 @@ -9194,31 +9194,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB104_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB104_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bltu s1, a0, .LBB104_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB104_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB104_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB104_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB104_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB104_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9230,28 +9230,28 @@ ; ; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB104_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a0, a1, .LBB104_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB104_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB104_3: # in Loop: Header=BB104_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB104_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_seq_cst: @@ -9265,31 +9265,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB104_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB104_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bltu s1, a0, .LBB104_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB104_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB104_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB104_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB104_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB104_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9301,28 +9301,28 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB104_3 +; RV64IA-NEXT: lr.w.aqrl a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a0, a1, .LBB104_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB104_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB104_3: # in Loop: Header=BB104_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB104_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umax i16* %a, i16 %b seq_cst ret i16 %1 @@ -9340,31 +9340,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB105_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB105_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bgeu s1, a0, .LBB105_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB105_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB105_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB105_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB105_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB105_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9376,28 +9376,28 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB105_3 +; RV32IA-NEXT: lr.w a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a1, a0, .LBB105_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB105_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB105_3: # in Loop: Header=BB105_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB105_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_monotonic: @@ -9411,31 +9411,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB105_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB105_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bgeu s1, a0, .LBB105_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB105_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB105_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB105_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB105_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB105_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9447,28 +9447,28 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 -; RV64IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB105_3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-NEXT: lr.w a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a1, a0, .LBB105_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB105_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB105_3: # in Loop: Header=BB105_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB105_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umin i16* %a, i16 %b monotonic ret i16 %1 @@ -9486,31 +9486,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB106_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB106_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bgeu s1, a0, .LBB106_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB106_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB106_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB106_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB106_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB106_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9522,28 +9522,28 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB106_3 +; RV32IA-NEXT: lr.w.aq a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a1, a0, .LBB106_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB106_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB106_3: # in Loop: Header=BB106_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: sc.w a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB106_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_acquire: @@ -9557,31 +9557,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB106_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB106_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bgeu s1, a0, .LBB106_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB106_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB106_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB106_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB106_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB106_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9593,28 +9593,28 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB106_3 +; RV64IA-NEXT: lr.w.aq a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a1, a0, .LBB106_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB106_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB106_3: # in Loop: Header=BB106_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a0) +; RV64IA-NEXT: sc.w a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB106_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umin i16* %a, i16 %b acquire ret i16 %1 @@ -9632,31 +9632,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB107_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB107_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bgeu s1, a0, .LBB107_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB107_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB107_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB107_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) +; RV32I-NEXT: sh a1, 6(sp) +; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB107_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB107_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9668,28 +9668,28 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB107_3 +; RV32IA-NEXT: lr.w a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a1, a0, .LBB107_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB107_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB107_3: # in Loop: Header=BB107_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB107_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_release: @@ -9703,31 +9703,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB107_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB107_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bgeu s1, a0, .LBB107_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB107_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB107_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB107_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) +; RV64I-NEXT: sh a1, 14(sp) +; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB107_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB107_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9739,28 +9739,28 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB107_3 +; RV64IA-NEXT: lr.w a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a1, a0, .LBB107_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB107_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB107_3: # in Loop: Header=BB107_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB107_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umin i16* %a, i16 %b release ret i16 %1 @@ -9778,31 +9778,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB108_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB108_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bgeu s1, a0, .LBB108_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB108_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB108_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB108_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB108_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB108_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9814,28 +9814,28 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB108_3 +; RV32IA-NEXT: lr.w.aq a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a1, a0, .LBB108_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB108_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB108_3: # in Loop: Header=BB108_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: sc.w.rl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB108_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_acq_rel: @@ -9849,31 +9849,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB108_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB108_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bgeu s1, a0, .LBB108_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB108_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB108_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB108_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB108_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB108_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9885,28 +9885,28 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB108_3 +; RV64IA-NEXT: lr.w.aq a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a1, a0, .LBB108_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB108_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB108_3: # in Loop: Header=BB108_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a0) +; RV64IA-NEXT: sc.w.rl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB108_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umin i16* %a, i16 %b acq_rel ret i16 %1 @@ -9924,31 +9924,31 @@ ; RV32I-NEXT: sw s4, 8(sp) ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: lhu a0, 0(a0) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s0, a0, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 ; RV32I-NEXT: .LBB109_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB109_3 +; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bgeu s1, a0, .LBB109_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB109_1 Depth=1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB109_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB109_1 Depth=1 -; RV32I-NEXT: sh a0, 6(sp) -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sh a1, 6(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB109_1 +; RV32I-NEXT: lh a1, 6(sp) +; RV32I-NEXT: beqz a0, .LBB109_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9960,28 +9960,28 @@ ; ; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lui a2, 16 -; RV32IA-NEXT: addi a2, a2, -1 -; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a6, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 -; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: sll a4, a3, a2 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a2, a4, a6 -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB109_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a6) +; RV32IA-NEXT: and a0, a3, a4 +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: bgeu a1, a0, .LBB109_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB109_1 Depth=1 -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: and a5, a5, a6 -; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: xor a5, a3, a1 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB109_3: # in Loop: Header=BB109_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV32IA-NEXT: bnez a5, .LBB109_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: srl a0, a3, a2 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_seq_cst: @@ -9995,31 +9995,31 @@ ; RV64I-NEXT: sd s4, 16(sp) ; RV64I-NEXT: mv s2, a1 ; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: lhu a0, 0(a0) -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw s0, a0, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 ; RV64I-NEXT: .LBB109_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB109_3 +; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bgeu s1, a0, .LBB109_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB109_1 Depth=1 ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: .LBB109_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB109_1 Depth=1 -; RV64I-NEXT: sh a0, 14(sp) -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a1, s3 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB109_1 +; RV64I-NEXT: lh a1, 14(sp) +; RV64I-NEXT: beqz a0, .LBB109_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -10031,28 +10031,28 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 -; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: slli a3, a0, 3 -; RV64IA-NEXT: andi a3, a3, 24 -; RV64IA-NEXT: sllw a6, a2, a3 -; RV64IA-NEXT: sllw a1, a1, a3 -; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: sllw a4, a3, a2 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a0) -; RV64IA-NEXT: and a2, a4, a6 -; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB109_3 +; RV64IA-NEXT: lr.w.aqrl a3, (a6) +; RV64IA-NEXT: and a0, a3, a4 +; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: bgeu a1, a0, .LBB109_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB109_1 Depth=1 -; RV64IA-NEXT: xor a5, a4, a1 -; RV64IA-NEXT: and a5, a5, a6 -; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: xor a5, a3, a1 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB109_3: # in Loop: Header=BB109_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) ; RV64IA-NEXT: bnez a5, .LBB109_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a4, a3 +; RV64IA-NEXT: srlw a0, a3, a2 ; RV64IA-NEXT: ret %1 = atomicrmw umin i16* %a, i16 %b seq_cst ret i16 %1 @@ -11336,28 +11336,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB145_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB145_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: blt s1, a3, .LBB145_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB145_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB145_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB145_1 Depth=1 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB145_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11378,31 +11379,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB145_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB145_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a0, .LBB145_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB145_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB145_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB145_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB145_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB145_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11427,28 +11428,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB146_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB146_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: blt s1, a3, .LBB146_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB146_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB146_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB146_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB146_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11469,31 +11471,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB146_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB146_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a0, .LBB146_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB146_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB146_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB146_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB146_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB146_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11518,28 +11520,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB147_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB147_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: blt s1, a3, .LBB147_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB147_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB147_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB147_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB147_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11560,31 +11563,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB147_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB147_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a0, .LBB147_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB147_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB147_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB147_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB147_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB147_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11609,28 +11612,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB148_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB148_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: blt s1, a3, .LBB148_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB148_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB148_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB148_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB148_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11651,31 +11655,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB148_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB148_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a0, .LBB148_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB148_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB148_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB148_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB148_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB148_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11700,28 +11704,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB149_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB149_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: blt s1, a3, .LBB149_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB149_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB149_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB149_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB149_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11742,31 +11747,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB149_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB149_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a0, .LBB149_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB149_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB149_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB149_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB149_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB149_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11791,28 +11796,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB150_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB150_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bge s1, a3, .LBB150_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB150_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB150_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB150_1 Depth=1 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB150_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11833,31 +11839,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB150_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB150_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a0, .LBB150_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB150_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB150_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB150_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB150_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB150_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11882,28 +11888,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB151_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB151_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bge s1, a3, .LBB151_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB151_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB151_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB151_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB151_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -11924,31 +11931,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB151_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB151_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a0, .LBB151_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB151_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB151_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB151_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB151_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB151_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11973,28 +11980,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB152_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB152_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bge s1, a3, .LBB152_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB152_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB152_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB152_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB152_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12015,31 +12023,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB152_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB152_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a0, .LBB152_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB152_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB152_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB152_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB152_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB152_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12064,28 +12072,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB153_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB153_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bge s1, a3, .LBB153_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB153_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB153_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB153_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB153_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12106,31 +12115,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB153_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB153_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a0, .LBB153_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB153_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB153_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB153_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB153_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB153_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12155,28 +12164,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB154_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB154_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bge s1, a3, .LBB154_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB154_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB154_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB154_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB154_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12197,31 +12207,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB154_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB154_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a0, .LBB154_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB154_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB154_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB154_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB154_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB154_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12246,28 +12256,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB155_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB155_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a3, .LBB155_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB155_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB155_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB155_1 Depth=1 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB155_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12288,31 +12299,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB155_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB155_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB155_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB155_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB155_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB155_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB155_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB155_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12337,28 +12348,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB156_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB156_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a3, .LBB156_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB156_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB156_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB156_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB156_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12379,31 +12391,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB156_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB156_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB156_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB156_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB156_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB156_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB156_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB156_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12428,28 +12440,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB157_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB157_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a3, .LBB157_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB157_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB157_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB157_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB157_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12470,31 +12483,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB157_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB157_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB157_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB157_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB157_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB157_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB157_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB157_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12519,28 +12532,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB158_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB158_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a3, .LBB158_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB158_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB158_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB158_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB158_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12561,31 +12575,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB158_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB158_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB158_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB158_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB158_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB158_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB158_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB158_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12610,28 +12624,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB159_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB159_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bltu s1, a3, .LBB159_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB159_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB159_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB159_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB159_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12652,31 +12667,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB159_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB159_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a0, .LBB159_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB159_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB159_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB159_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB159_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB159_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12701,28 +12716,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB160_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB160_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a3, .LBB160_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB160_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB160_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB160_1 Depth=1 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB160_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12743,31 +12759,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB160_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB160_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB160_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB160_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB160_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB160_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB160_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB160_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12792,28 +12808,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB161_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB161_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a3, .LBB161_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB161_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB161_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB161_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB161_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12834,31 +12851,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB161_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB161_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB161_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB161_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB161_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB161_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB161_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB161_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12883,28 +12900,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB162_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB162_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a3, .LBB162_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB162_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB162_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB162_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB162_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -12925,31 +12943,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB162_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB162_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB162_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB162_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB162_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB162_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB162_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB162_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12974,28 +12992,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB163_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB163_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a3, .LBB163_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB163_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB163_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB163_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB163_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -13016,31 +13035,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB163_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB163_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB163_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB163_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB163_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB163_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB163_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB163_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -13065,28 +13084,29 @@ ; RV32I-NEXT: sw s0, 24(sp) ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: addi s2, sp, 12 ; RV32I-NEXT: .LBB164_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB164_3 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bgeu s1, a3, .LBB164_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB164_1 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: .LBB164_3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB164_1 Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: sw a3, 12(sp) ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a2, 12(sp) +; RV32I-NEXT: lw a3, 12(sp) ; RV32I-NEXT: beqz a0, .LBB164_1 ; RV32I-NEXT: # %bb.4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) ; RV32I-NEXT: lw s0, 24(sp) @@ -13107,31 +13127,31 @@ ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) ; RV64I-NEXT: sd s3, 8(sp) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: sext.w s0, a1 -; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lwu a3, 0(a0) +; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: addi s2, sp, 4 ; RV64I-NEXT: .LBB164_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB164_3 +; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a0, .LBB164_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB164_1 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s3 ; RV64I-NEXT: .LBB164_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB164_1 Depth=1 -; RV64I-NEXT: sw a0, 4(sp) -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s3 +; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB164_1 +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB164_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -14562,43 +14582,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB200_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB200_3 +; RV32I-NEXT: beq a5, s1, .LBB200_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB200_4 ; RV32I-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB200_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB200_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB200_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB200_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -14615,43 +14637,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB200_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB200_3 +; RV32IA-NEXT: beq a5, s1, .LBB200_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB200_4 ; RV32IA-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB200_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB200_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB200_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) +; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB200_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -14667,28 +14691,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB200_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB200_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a3, .LBB200_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB200_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: sd a3, 8(sp) +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB200_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -14713,43 +14738,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB201_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB201_3 +; RV32I-NEXT: beq a5, s1, .LBB201_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB201_4 ; RV32I-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB201_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB201_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB201_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB201_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -14766,43 +14793,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB201_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB201_3 +; RV32IA-NEXT: beq a5, s1, .LBB201_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB201_4 ; RV32IA-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB201_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB201_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB201_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB201_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -14818,28 +14847,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB201_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB201_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a3, .LBB201_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB201_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB201_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -14864,43 +14894,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB202_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB202_3 +; RV32I-NEXT: beq a5, s1, .LBB202_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB202_4 ; RV32I-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB202_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB202_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB202_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB202_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -14917,43 +14949,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB202_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB202_3 +; RV32IA-NEXT: beq a5, s1, .LBB202_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB202_4 ; RV32IA-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB202_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB202_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB202_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB202_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -14969,28 +15003,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB202_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB202_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a3, .LBB202_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB202_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB202_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15015,43 +15050,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB203_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB203_3 +; RV32I-NEXT: beq a5, s1, .LBB203_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB203_4 ; RV32I-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB203_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB203_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB203_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB203_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15068,43 +15105,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB203_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB203_3 +; RV32IA-NEXT: beq a5, s1, .LBB203_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB203_4 ; RV32IA-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB203_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB203_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB203_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB203_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -15120,28 +15159,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB203_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB203_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a3, .LBB203_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB203_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB203_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15166,43 +15206,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB204_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB204_3 +; RV32I-NEXT: beq a5, s1, .LBB204_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB204_4 ; RV32I-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB204_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB204_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB204_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB204_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15219,43 +15261,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB204_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB204_3 +; RV32IA-NEXT: beq a5, s1, .LBB204_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB204_4 ; RV32IA-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB204_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB204_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB204_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB204_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -15271,28 +15315,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB204_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB204_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: blt s1, a3, .LBB204_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB204_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB204_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15317,44 +15362,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB205_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB205_3 +; RV32I-NEXT: beq a5, s1, .LBB205_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB205_4 ; RV32I-NEXT: .LBB205_3: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB205_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB205_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB205_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB205_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15371,44 +15418,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB205_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB205_3 +; RV32IA-NEXT: beq a5, s1, .LBB205_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB205_4 ; RV32IA-NEXT: .LBB205_3: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB205_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB205_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB205_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) +; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB205_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -15424,28 +15473,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB205_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB205_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a3, .LBB205_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB205_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: sd a3, 8(sp) +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB205_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15470,44 +15520,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB206_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB206_3 +; RV32I-NEXT: beq a5, s1, .LBB206_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB206_4 ; RV32I-NEXT: .LBB206_3: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB206_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB206_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB206_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB206_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15524,44 +15576,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB206_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB206_3 +; RV32IA-NEXT: beq a5, s1, .LBB206_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB206_4 ; RV32IA-NEXT: .LBB206_3: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB206_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB206_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB206_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB206_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -15577,28 +15631,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB206_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB206_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a3, .LBB206_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB206_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB206_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15623,44 +15678,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB207_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB207_3 +; RV32I-NEXT: beq a5, s1, .LBB207_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB207_4 ; RV32I-NEXT: .LBB207_3: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB207_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB207_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB207_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB207_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15677,44 +15734,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB207_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB207_3 +; RV32IA-NEXT: beq a5, s1, .LBB207_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB207_4 ; RV32IA-NEXT: .LBB207_3: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB207_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB207_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB207_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB207_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -15730,28 +15789,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB207_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB207_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a3, .LBB207_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB207_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB207_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15776,44 +15836,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB208_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB208_3 +; RV32I-NEXT: beq a5, s1, .LBB208_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB208_4 ; RV32I-NEXT: .LBB208_3: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB208_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB208_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB208_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB208_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15830,44 +15892,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB208_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB208_3 +; RV32IA-NEXT: beq a5, s1, .LBB208_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB208_4 ; RV32IA-NEXT: .LBB208_3: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB208_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB208_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB208_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB208_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -15883,28 +15947,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB208_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB208_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a3, .LBB208_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB208_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB208_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -15929,44 +15994,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB209_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB209_3 +; RV32I-NEXT: beq a5, s1, .LBB209_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: slt a0, s1, a5 ; RV32I-NEXT: j .LBB209_4 ; RV32I-NEXT: .LBB209_3: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB209_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB209_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB209_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB209_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -15983,44 +16050,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB209_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB209_3 +; RV32IA-NEXT: beq a5, s1, .LBB209_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: slt a0, s1, a5 ; RV32IA-NEXT: j .LBB209_4 ; RV32IA-NEXT: .LBB209_3: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB209_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB209_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB209_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB209_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16036,28 +16105,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB209_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB209_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bge s1, a3, .LBB209_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB209_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB209_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16082,43 +16152,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB210_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB210_3 +; RV32I-NEXT: beq a5, s1, .LBB210_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB210_4 ; RV32I-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB210_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB210_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB210_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB210_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -16135,43 +16207,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB210_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB210_3 +; RV32IA-NEXT: beq a5, s1, .LBB210_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB210_4 ; RV32IA-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB210_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB210_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB210_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) +; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB210_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16187,28 +16261,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB210_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB210_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a3, .LBB210_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB210_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: sd a3, 8(sp) +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB210_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16233,43 +16308,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB211_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB211_3 +; RV32I-NEXT: beq a5, s1, .LBB211_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB211_4 ; RV32I-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB211_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB211_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB211_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB211_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -16286,43 +16363,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB211_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB211_3 +; RV32IA-NEXT: beq a5, s1, .LBB211_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB211_4 ; RV32IA-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB211_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB211_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB211_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB211_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16338,28 +16417,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB211_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB211_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a3, .LBB211_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB211_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB211_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16384,43 +16464,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB212_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB212_3 +; RV32I-NEXT: beq a5, s1, .LBB212_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB212_4 ; RV32I-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB212_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB212_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB212_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB212_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -16437,43 +16519,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB212_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB212_3 +; RV32IA-NEXT: beq a5, s1, .LBB212_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB212_4 ; RV32IA-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB212_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB212_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB212_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB212_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16489,28 +16573,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB212_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB212_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a3, .LBB212_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB212_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB212_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16535,43 +16620,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB213_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB213_3 +; RV32I-NEXT: beq a5, s1, .LBB213_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB213_4 ; RV32I-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB213_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB213_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB213_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB213_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -16588,43 +16675,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB213_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB213_3 +; RV32IA-NEXT: beq a5, s1, .LBB213_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB213_4 ; RV32IA-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB213_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB213_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB213_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB213_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16640,28 +16729,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB213_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB213_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a3, .LBB213_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB213_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB213_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16686,43 +16776,45 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB214_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB214_3 +; RV32I-NEXT: beq a5, s1, .LBB214_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB214_4 ; RV32I-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB214_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB214_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB214_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB214_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -16739,43 +16831,45 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB214_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB214_3 +; RV32IA-NEXT: beq a5, s1, .LBB214_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB214_4 ; RV32IA-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB214_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB214_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB214_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB214_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16791,28 +16885,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB214_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB214_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bltu s1, a3, .LBB214_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB214_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB214_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16837,44 +16932,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB215_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB215_3 +; RV32I-NEXT: beq a5, s1, .LBB215_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB215_4 ; RV32I-NEXT: .LBB215_3: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB215_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB215_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB215_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB215_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -16891,44 +16988,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB215_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB215_3 +; RV32IA-NEXT: beq a5, s1, .LBB215_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB215_4 ; RV32IA-NEXT: .LBB215_3: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB215_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB215_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB215_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) +; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB215_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -16944,28 +17043,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB215_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB215_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a3, .LBB215_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB215_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: sd a3, 8(sp) +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB215_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -16990,44 +17090,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB216_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB216_3 +; RV32I-NEXT: beq a5, s1, .LBB216_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB216_4 ; RV32I-NEXT: .LBB216_3: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB216_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB216_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB216_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB216_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -17044,44 +17146,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB216_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB216_3 +; RV32IA-NEXT: beq a5, s1, .LBB216_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB216_4 ; RV32IA-NEXT: .LBB216_3: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB216_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB216_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB216_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB216_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -17097,28 +17201,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB216_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB216_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a3, .LBB216_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB216_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB216_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -17143,44 +17248,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB217_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB217_3 +; RV32I-NEXT: beq a5, s1, .LBB217_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB217_4 ; RV32I-NEXT: .LBB217_3: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB217_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB217_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB217_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB217_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -17197,44 +17304,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB217_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB217_3 +; RV32IA-NEXT: beq a5, s1, .LBB217_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB217_4 ; RV32IA-NEXT: .LBB217_3: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB217_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB217_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB217_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB217_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -17250,28 +17359,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB217_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB217_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a3, .LBB217_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB217_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 3 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB217_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -17296,44 +17406,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB218_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB218_3 +; RV32I-NEXT: beq a5, s1, .LBB218_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB218_4 ; RV32I-NEXT: .LBB218_3: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB218_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB218_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB218_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB218_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -17350,44 +17462,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB218_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB218_3 +; RV32IA-NEXT: beq a5, s1, .LBB218_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB218_4 ; RV32IA-NEXT: .LBB218_3: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB218_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB218_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB218_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB218_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -17403,28 +17517,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB218_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB218_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a3, .LBB218_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB218_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB218_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) @@ -17449,44 +17564,46 @@ ; RV32I-NEXT: sw s1, 20(sp) ; RV32I-NEXT: sw s2, 16(sp) ; RV32I-NEXT: sw s3, 12(sp) -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp ; RV32I-NEXT: .LBB219_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB219_3 +; RV32I-NEXT: beq a5, s1, .LBB219_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: sltu a0, s1, a5 ; RV32I-NEXT: j .LBB219_4 ; RV32I-NEXT: .LBB219_3: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a0, s2, a4 ; RV32I-NEXT: .LBB219_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: bnez a0, .LBB219_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB219_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: sw a4, 0(sp) +; RV32I-NEXT: sw a5, 4(sp) ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) +; RV32I-NEXT: lw a5, 4(sp) +; RV32I-NEXT: lw a4, 0(sp) ; RV32I-NEXT: beqz a0, .LBB219_1 ; RV32I-NEXT: # %bb.7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -17503,44 +17620,46 @@ ; RV32IA-NEXT: sw s1, 20(sp) ; RV32IA-NEXT: sw s2, 16(sp) ; RV32IA-NEXT: sw s3, 12(sp) -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp ; RV32IA-NEXT: .LBB219_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB219_3 +; RV32IA-NEXT: beq a5, s1, .LBB219_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: sltu a0, s1, a5 ; RV32IA-NEXT: j .LBB219_4 ; RV32IA-NEXT: .LBB219_3: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a0, s2, a4 ; RV32IA-NEXT: .LBB219_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: mv a3, a5 ; RV32IA-NEXT: bnez a0, .LBB219_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: .LBB219_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sw a1, 4(sp) -; RV32IA-NEXT: mv a0, s1 -; RV32IA-NEXT: mv a1, s3 +; RV32IA-NEXT: sw a4, 0(sp) +; RV32IA-NEXT: sw a5, 4(sp) ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: mv a1, s3 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) +; RV32IA-NEXT: lw a5, 4(sp) +; RV32IA-NEXT: lw a4, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB219_1 ; RV32IA-NEXT: # %bb.7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) ; RV32IA-NEXT: lw s1, 20(sp) @@ -17556,28 +17675,29 @@ ; RV64I-NEXT: sd s0, 32(sp) ; RV64I-NEXT: sd s1, 24(sp) ; RV64I-NEXT: sd s2, 16(sp) -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: addi s2, sp, 8 ; RV64I-NEXT: .LBB219_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB219_3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bgeu s1, a3, .LBB219_3 ; RV64I-NEXT: # %bb.2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: .LBB219_3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: sd a3, 8(sp) ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a2, 8(sp) +; RV64I-NEXT: ld a3, 8(sp) ; RV64I-NEXT: beqz a0, .LBB219_1 ; RV64I-NEXT: # %bb.4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) ; RV64I-NEXT: ld s0, 32(sp) diff --git a/llvm/test/CodeGen/RISCV/bare-select.ll b/llvm/test/CodeGen/RISCV/bare-select.ll --- a/llvm/test/CodeGen/RISCV/bare-select.ll +++ b/llvm/test/CodeGen/RISCV/bare-select.ll @@ -5,12 +5,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-LABEL: bare_select: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: bnez a0, .LBB0_2 +; RV32I-NEXT: andi a3, a0, 1 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: bnez a3, .LBB0_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = select i1 %a, i32 %b, i32 %c ret i32 %1 @@ -19,12 +19,12 @@ define float @bare_select_float(i1 %a, float %b, float %c) nounwind { ; RV32I-LABEL: bare_select_float: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: bnez a0, .LBB1_2 +; RV32I-NEXT: andi a3, a0, 1 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: bnez a3, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = select i1 %a, float %b, float %c ret float %1 diff --git a/llvm/test/CodeGen/RISCV/blockaddress.ll b/llvm/test/CodeGen/RISCV/blockaddress.ll --- a/llvm/test/CodeGen/RISCV/blockaddress.ll +++ b/llvm/test/CodeGen/RISCV/blockaddress.ll @@ -9,11 +9,11 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) -; RV32I-NEXT: lui a0, %hi(.Ltmp0) -; RV32I-NEXT: addi a0, a0, %lo(.Ltmp0) -; RV32I-NEXT: lui a1, %hi(addr) -; RV32I-NEXT: sw a0, %lo(addr)(a1) -; RV32I-NEXT: lw a0, %lo(addr)(a1) +; RV32I-NEXT: lui a0, %hi(addr) +; RV32I-NEXT: lui a1, %hi(.Ltmp0) +; RV32I-NEXT: addi a1, a1, %lo(.Ltmp0) +; RV32I-NEXT: sw a1, %lo(addr)(a0) +; RV32I-NEXT: lw a0, %lo(addr)(a0) ; RV32I-NEXT: jr a0 ; RV32I-NEXT: .Ltmp0: # Block address taken ; RV32I-NEXT: .LBB0_1: # %block diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -29,10 +29,10 @@ define i32 @test_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi a1, a1, -256 -; RV32I-NEXT: srli a2, a0, 8 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: srli a2, a0, 24 ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: slli a2, a0, 8 @@ -49,9 +49,9 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV32I-LABEL: test_bswap_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a3, a2, -256 ; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -256 ; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a2, a2, a4 @@ -87,10 +87,10 @@ ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -131,10 +131,10 @@ ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -172,10 +172,10 @@ ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -220,11 +220,11 @@ ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -269,9 +269,9 @@ ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: not a1, s4 ; RV32I-NEXT: and a0, a1, a0 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi s5, a1, 1365 ; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s5, a2, 1365 ; RV32I-NEXT: and a1, a1, s5 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 @@ -282,12 +282,12 @@ ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s6, a1, 257 ; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s1, a1, -241 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: mv a1, s6 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s1, a1, 257 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, s3, -1 @@ -302,8 +302,8 @@ ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: mv a1, s6 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: bnez s4, .LBB7_2 ; RV32I-NEXT: # %bb.1: @@ -336,10 +336,10 @@ ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -371,10 +371,10 @@ ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -406,10 +406,10 @@ ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 @@ -450,9 +450,9 @@ ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: not a1, s4 ; RV32I-NEXT: and a0, a1, a0 -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi s5, a1, 1365 ; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s5, a2, 1365 ; RV32I-NEXT: and a1, a1, s5 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 @@ -463,12 +463,12 @@ ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s6, a1, 257 ; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s1, a1, -241 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: mv a1, s6 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s1, a1, 257 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, s3, -1 @@ -483,8 +483,8 @@ ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: mv a1, s6 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: bnez s4, .LBB11_2 ; RV32I-NEXT: # %bb.1: @@ -514,10 +514,10 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) -; RV32I-NEXT: lui a1, 349525 -; RV32I-NEXT: addi a1, a1, 1365 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -23,8 +23,8 @@ ; ILP32-LP64-LABEL: callee: ; ILP32-LP64: # %bb.0: ; ILP32-LP64-NEXT: lui a0, %hi(var) -; ILP32-LP64-NEXT: addi a1, a0, %lo(var) ; ILP32-LP64-NEXT: flw ft0, %lo(var)(a0) +; ILP32-LP64-NEXT: addi a1, a0, %lo(var) ; ILP32-LP64-NEXT: flw ft1, 4(a1) ; ILP32-LP64-NEXT: flw ft2, 8(a1) ; ILP32-LP64-NEXT: flw ft3, 12(a1) @@ -52,14 +52,14 @@ ; ILP32-LP64-NEXT: flw fs5, 100(a1) ; ILP32-LP64-NEXT: flw fs6, 104(a1) ; ILP32-LP64-NEXT: flw fs7, 108(a1) -; ILP32-LP64-NEXT: flw fs8, 112(a1) -; ILP32-LP64-NEXT: flw fs9, 116(a1) -; ILP32-LP64-NEXT: flw fs10, 120(a1) -; ILP32-LP64-NEXT: flw fs11, 124(a1) -; ILP32-LP64-NEXT: fsw fs11, 124(a1) -; ILP32-LP64-NEXT: fsw fs10, 120(a1) -; ILP32-LP64-NEXT: fsw fs9, 116(a1) -; ILP32-LP64-NEXT: fsw fs8, 112(a1) +; ILP32-LP64-NEXT: flw fs8, 124(a1) +; ILP32-LP64-NEXT: flw fs9, 120(a1) +; ILP32-LP64-NEXT: flw fs10, 116(a1) +; ILP32-LP64-NEXT: flw fs11, 112(a1) +; ILP32-LP64-NEXT: fsw fs8, 124(a1) +; ILP32-LP64-NEXT: fsw fs9, 120(a1) +; ILP32-LP64-NEXT: fsw fs10, 116(a1) +; ILP32-LP64-NEXT: fsw fs11, 112(a1) ; ILP32-LP64-NEXT: fsw fs7, 108(a1) ; ILP32-LP64-NEXT: fsw fs6, 104(a1) ; ILP32-LP64-NEXT: fsw fs5, 100(a1) @@ -106,6 +106,7 @@ ; ILP32F-LP64F-NEXT: fsw fs10, 4(sp) ; ILP32F-LP64F-NEXT: fsw fs11, 0(sp) ; ILP32F-LP64F-NEXT: lui a0, %hi(var) +; ILP32F-LP64F-NEXT: flw ft0, %lo(var)(a0) ; ILP32F-LP64F-NEXT: addi a1, a0, %lo(var) ; ; ILP32D-LP64D-LABEL: callee: @@ -124,6 +125,7 @@ ; ILP32D-LP64D-NEXT: fsd fs10, 8(sp) ; ILP32D-LP64D-NEXT: fsd fs11, 0(sp) ; ILP32D-LP64D-NEXT: lui a0, %hi(var) +; ILP32D-LP64D-NEXT: flw ft0, %lo(var)(a0) ; ILP32D-LP64D-NEXT: addi a1, a0, %lo(var) %val = load [32 x float], [32 x float]* @var store volatile [32 x float] %val, [32 x float]* @var diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -19,8 +19,8 @@ ; ILP32-LP64-LABEL: callee: ; ILP32-LP64: # %bb.0: ; ILP32-LP64-NEXT: lui a0, %hi(var) -; ILP32-LP64-NEXT: addi a1, a0, %lo(var) ; ILP32-LP64-NEXT: fld ft0, %lo(var)(a0) +; ILP32-LP64-NEXT: addi a1, a0, %lo(var) ; ILP32-LP64-NEXT: fld ft1, 8(a1) ; ILP32-LP64-NEXT: fld ft2, 16(a1) ; ILP32-LP64-NEXT: fld ft3, 24(a1) @@ -48,14 +48,14 @@ ; ILP32-LP64-NEXT: fld fs5, 200(a1) ; ILP32-LP64-NEXT: fld fs6, 208(a1) ; ILP32-LP64-NEXT: fld fs7, 216(a1) -; ILP32-LP64-NEXT: fld fs8, 224(a1) -; ILP32-LP64-NEXT: fld fs9, 232(a1) -; ILP32-LP64-NEXT: fld fs10, 240(a1) -; ILP32-LP64-NEXT: fld fs11, 248(a1) -; ILP32-LP64-NEXT: fsd fs11, 248(a1) -; ILP32-LP64-NEXT: fsd fs10, 240(a1) -; ILP32-LP64-NEXT: fsd fs9, 232(a1) -; ILP32-LP64-NEXT: fsd fs8, 224(a1) +; ILP32-LP64-NEXT: fld fs8, 248(a1) +; ILP32-LP64-NEXT: fld fs9, 240(a1) +; ILP32-LP64-NEXT: fld fs10, 232(a1) +; ILP32-LP64-NEXT: fld fs11, 224(a1) +; ILP32-LP64-NEXT: fsd fs8, 248(a1) +; ILP32-LP64-NEXT: fsd fs9, 240(a1) +; ILP32-LP64-NEXT: fsd fs10, 232(a1) +; ILP32-LP64-NEXT: fsd fs11, 224(a1) ; ILP32-LP64-NEXT: fsd fs7, 216(a1) ; ILP32-LP64-NEXT: fsd fs6, 208(a1) ; ILP32-LP64-NEXT: fsd fs5, 200(a1) @@ -102,6 +102,7 @@ ; ILP32D-LP64D-NEXT: fsd fs10, 8(sp) ; ILP32D-LP64D-NEXT: fsd fs11, 0(sp) ; ILP32D-LP64D-NEXT: lui a0, %hi(var) +; ILP32D-LP64D-NEXT: fld ft0, %lo(var)(a0) ; ILP32D-LP64D-NEXT: addi a1, a0, %lo(var) %val = load [32 x double], [32 x double]* @var store volatile [32 x double] %val, [32 x double]* @var diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -41,7 +41,9 @@ ; RV32I-NEXT: sw s10, 36(sp) ; RV32I-NEXT: sw s11, 32(sp) ; RV32I-NEXT: lui a0, %hi(var) -; RV32I-NEXT: addi a1, a0, %lo(var) +; RV32I-NEXT: lw a1, %lo(var)(a0) +; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: addi a2, a0, %lo(var) ; ; RV32I-WITH-FP-LABEL: callee: ; RV32I-WITH-FP: # %bb.0: @@ -61,7 +63,9 @@ ; RV32I-WITH-FP-NEXT: sw s11, 28(sp) ; RV32I-WITH-FP-NEXT: addi s0, sp, 80 ; RV32I-WITH-FP-NEXT: lui a0, %hi(var) -; RV32I-WITH-FP-NEXT: addi a1, a0, %lo(var) +; RV32I-WITH-FP-NEXT: lw a1, %lo(var)(a0) +; RV32I-WITH-FP-NEXT: sw a1, -56(s0) +; RV32I-WITH-FP-NEXT: addi a2, a0, %lo(var) ; ; RV64I-LABEL: callee: ; RV64I: # %bb.0: @@ -79,7 +83,9 @@ ; RV64I-NEXT: sd s10, 56(sp) ; RV64I-NEXT: sd s11, 48(sp) ; RV64I-NEXT: lui a0, %hi(var) -; RV64I-NEXT: addi a1, a0, %lo(var) +; RV64I-NEXT: lw a1, %lo(var)(a0) +; RV64I-NEXT: sd a1, 40(sp) +; RV64I-NEXT: addi a2, a0, %lo(var) ; ; RV64I-WITH-FP-LABEL: callee: ; RV64I-WITH-FP: # %bb.0: @@ -99,7 +105,9 @@ ; RV64I-WITH-FP-NEXT: sd s11, 56(sp) ; RV64I-WITH-FP-NEXT: addi s0, sp, 160 ; RV64I-WITH-FP-NEXT: lui a0, %hi(var) -; RV64I-WITH-FP-NEXT: addi a1, a0, %lo(var) +; RV64I-WITH-FP-NEXT: lw a1, %lo(var)(a0) +; RV64I-WITH-FP-NEXT: sd a1, -112(s0) +; RV64I-WITH-FP-NEXT: addi a2, a0, %lo(var) %val = load [32 x i32], [32 x i32]* @var store volatile [32 x i32] %val, [32 x i32]* @var ret void @@ -111,36 +119,41 @@ define void @caller() nounwind { ; RV32I-LABEL: caller: ; RV32I: lui a0, %hi(var) -; RV32I-NEXT: addi s1, a0, %lo(var) +; RV32I-NEXT: lw a1, %lo(var)(a0) +; RV32I-NEXT: sw a1, 88(sp) +; RV32I-NEXT: addi s0, a0, %lo(var) + ; RV32I: sw a0, 8(sp) -; RV32I-NEXT: lw s2, 84(s1) -; RV32I-NEXT: lw s3, 88(s1) -; RV32I-NEXT: lw s4, 92(s1) -; RV32I-NEXT: lw s5, 96(s1) -; RV32I-NEXT: lw s6, 100(s1) -; RV32I-NEXT: lw s7, 104(s1) -; RV32I-NEXT: lw s8, 108(s1) -; RV32I-NEXT: lw s9, 112(s1) -; RV32I-NEXT: lw s10, 116(s1) -; RV32I-NEXT: lw s11, 120(s1) -; RV32I-NEXT: lw s0, 124(s1) +; RV32I-NEXT: lw s2, 84(s0) +; RV32I-NEXT: lw s3, 88(s0) +; RV32I-NEXT: lw s4, 92(s0) +; RV32I-NEXT: lw s5, 96(s0) +; RV32I-NEXT: lw s6, 100(s0) +; RV32I-NEXT: lw s7, 104(s0) +; RV32I-NEXT: lw s8, 108(s0) +; RV32I-NEXT: lw s9, 112(s0) +; RV32I-NEXT: lw s10, 116(s0) +; RV32I-NEXT: lw s11, 120(s0) +; RV32I-NEXT: lw s1, 124(s0) ; RV32I-NEXT: call callee -; RV32I-NEXT: sw s0, 124(s1) -; RV32I-NEXT: sw s11, 120(s1) -; RV32I-NEXT: sw s10, 116(s1) -; RV32I-NEXT: sw s9, 112(s1) -; RV32I-NEXT: sw s8, 108(s1) -; RV32I-NEXT: sw s7, 104(s1) -; RV32I-NEXT: sw s6, 100(s1) -; RV32I-NEXT: sw s5, 96(s1) -; RV32I-NEXT: sw s4, 92(s1) -; RV32I-NEXT: sw s3, 88(s1) -; RV32I-NEXT: sw s2, 84(s1) +; RV32I-NEXT: sw s1, 124(s0) +; RV32I-NEXT: sw s11, 120(s0) +; RV32I-NEXT: sw s10, 116(s0) +; RV32I-NEXT: sw s9, 112(s0) +; RV32I-NEXT: sw s8, 108(s0) +; RV32I-NEXT: sw s7, 104(s0) +; RV32I-NEXT: sw s6, 100(s0) +; RV32I-NEXT: sw s5, 96(s0) +; RV32I-NEXT: sw s4, 92(s0) +; RV32I-NEXT: sw s3, 88(s0) +; RV32I-NEXT: sw s2, 84(s0) ; RV32I-NEXT: lw a0, 8(sp) ; ; RV32I-WITH-FP-LABEL: caller: ; RV32I-WITH-FP: addi s0, sp, 144 ; RV32I-WITH-FP-NEXT: lui a0, %hi(var) +; RV32I-WITH-FP-NEXT: lw a1, %lo(var)(a0) +; RV32I-WITH-FP-NEXT: sw a1, -56(s0) ; RV32I-WITH-FP-NEXT: addi s1, a0, %lo(var) ; RV32I-WITH-FP: sw a0, -140(s0) ; RV32I-WITH-FP-NEXT: lw s5, 88(s1) @@ -168,36 +181,40 @@ ; ; RV64I-LABEL: caller: ; RV64I: lui a0, %hi(var) -; RV64I-NEXT: addi s1, a0, %lo(var) +; RV64I-NEXT: lw a1, %lo(var)(a0) +; RV64I-NEXT: sd a1, 160(sp) +; RV64I-NEXT: addi s0, a0, %lo(var) ; RV64I: sd a0, 0(sp) -; RV64I-NEXT: lw s2, 84(s1) -; RV64I-NEXT: lw s3, 88(s1) -; RV64I-NEXT: lw s4, 92(s1) -; RV64I-NEXT: lw s5, 96(s1) -; RV64I-NEXT: lw s6, 100(s1) -; RV64I-NEXT: lw s7, 104(s1) -; RV64I-NEXT: lw s8, 108(s1) -; RV64I-NEXT: lw s9, 112(s1) -; RV64I-NEXT: lw s10, 116(s1) -; RV64I-NEXT: lw s11, 120(s1) -; RV64I-NEXT: lw s0, 124(s1) +; RV64I-NEXT: lw s2, 84(s0) +; RV64I-NEXT: lw s3, 88(s0) +; RV64I-NEXT: lw s4, 92(s0) +; RV64I-NEXT: lw s5, 96(s0) +; RV64I-NEXT: lw s6, 100(s0) +; RV64I-NEXT: lw s7, 104(s0) +; RV64I-NEXT: lw s8, 108(s0) +; RV64I-NEXT: lw s9, 112(s0) +; RV64I-NEXT: lw s10, 116(s0) +; RV64I-NEXT: lw s11, 120(s0) +; RV64I-NEXT: lw s1, 124(s0) ; RV64I-NEXT: call callee -; RV64I-NEXT: sw s0, 124(s1) -; RV64I-NEXT: sw s11, 120(s1) -; RV64I-NEXT: sw s10, 116(s1) -; RV64I-NEXT: sw s9, 112(s1) -; RV64I-NEXT: sw s8, 108(s1) -; RV64I-NEXT: sw s7, 104(s1) -; RV64I-NEXT: sw s6, 100(s1) -; RV64I-NEXT: sw s5, 96(s1) -; RV64I-NEXT: sw s4, 92(s1) -; RV64I-NEXT: sw s3, 88(s1) -; RV64I-NEXT: sw s2, 84(s1) +; RV64I-NEXT: sw s1, 124(s0) +; RV64I-NEXT: sw s11, 120(s0) +; RV64I-NEXT: sw s10, 116(s0) +; RV64I-NEXT: sw s9, 112(s0) +; RV64I-NEXT: sw s8, 108(s0) +; RV64I-NEXT: sw s7, 104(s0) +; RV64I-NEXT: sw s6, 100(s0) +; RV64I-NEXT: sw s5, 96(s0) +; RV64I-NEXT: sw s4, 92(s0) +; RV64I-NEXT: sw s3, 88(s0) +; RV64I-NEXT: sw s2, 84(s0) ; RV64I-NEXT: ld a0, 0(sp) ; ; RV64I-WITH-FP-LABEL: caller: ; RV64I-WITH-FP: addi s0, sp, 288 ; RV64I-WITH-FP-NEXT: lui a0, %hi(var) +; RV64I-WITH-FP-NEXT: lw a1, %lo(var)(a0) +; RV64I-WITH-FP-NEXT: sd a1, -112(s0) ; RV64I-WITH-FP-NEXT: addi s1, a0, %lo(var) ; RV64I-WITH-FP: sd a0, -280(s0) ; RV64I-WITH-FP-NEXT: lw s5, 88(s1) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -61,8 +61,8 @@ ; RV32I-FPELIM-NEXT: addi sp, sp, -16 ; RV32I-FPELIM-NEXT: sw ra, 12(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 1 -; RV32I-FPELIM-NEXT: mv a1, zero ; RV32I-FPELIM-NEXT: lui a2, 262144 +; RV32I-FPELIM-NEXT: mv a1, zero ; RV32I-FPELIM-NEXT: call callee_double_in_regs ; RV32I-FPELIM-NEXT: lw ra, 12(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 16 @@ -75,8 +75,8 @@ ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 -; RV32I-WITHFP-NEXT: mv a1, zero ; RV32I-WITHFP-NEXT: lui a2, 262144 +; RV32I-WITHFP-NEXT: mv a1, zero ; RV32I-WITHFP-NEXT: call callee_double_in_regs ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) @@ -94,14 +94,14 @@ ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: lw a1, 0(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: lw a1, 8(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: lw a1, 16(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: lw a1, 20(sp) +; RV32I-FPELIM-NEXT: lw a2, 0(sp) +; RV32I-FPELIM-NEXT: lw a3, 8(sp) +; RV32I-FPELIM-NEXT: lw a4, 16(sp) +; RV32I-FPELIM-NEXT: add a0, a0, a7 +; RV32I-FPELIM-NEXT: add a0, a0, a2 +; RV32I-FPELIM-NEXT: add a0, a0, a3 +; RV32I-FPELIM-NEXT: add a0, a0, a4 ; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: ret ; @@ -112,14 +112,14 @@ ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: lw a1, 0(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: lw a1, 8(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: lw a1, 16(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw a1, 20(s0) +; RV32I-WITHFP-NEXT: lw a2, 0(s0) +; RV32I-WITHFP-NEXT: lw a3, 8(s0) +; RV32I-WITHFP-NEXT: lw a4, 16(s0) +; RV32I-WITHFP-NEXT: add a0, a0, a7 +; RV32I-WITHFP-NEXT: add a0, a0, a2 +; RV32I-WITHFP-NEXT: add a0, a0, a3 +; RV32I-WITHFP-NEXT: add a0, a0, a4 ; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) @@ -169,8 +169,7 @@ ; RV32I-FPELIM-NEXT: addi a0, a0, -328 ; RV32I-FPELIM-NEXT: sw a0, 36(sp) ; RV32I-FPELIM-NEXT: lui a0, 335544 -; RV32I-FPELIM-NEXT: addi a0, a0, 1311 -; RV32I-FPELIM-NEXT: sw a0, 32(sp) +; RV32I-FPELIM-NEXT: addi t0, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: addi a2, sp, 32 @@ -180,6 +179,7 @@ ; RV32I-FPELIM-NEXT: addi a4, zero, 13 ; RV32I-FPELIM-NEXT: addi a6, zero, 4 ; RV32I-FPELIM-NEXT: addi a7, zero, 14 +; RV32I-FPELIM-NEXT: sw t0, 32(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -215,8 +215,7 @@ ; RV32I-WITHFP-NEXT: addi a0, a0, -328 ; RV32I-WITHFP-NEXT: sw a0, -28(s0) ; RV32I-WITHFP-NEXT: lui a0, 335544 -; RV32I-WITHFP-NEXT: addi a0, a0, 1311 -; RV32I-WITHFP-NEXT: sw a0, -32(s0) +; RV32I-WITHFP-NEXT: addi t0, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: addi a2, s0, -32 @@ -226,6 +225,7 @@ ; RV32I-WITHFP-NEXT: addi a4, zero, 13 ; RV32I-WITHFP-NEXT: addi a6, zero, 4 ; RV32I-WITHFP-NEXT: addi a7, zero, 14 +; RV32I-WITHFP-NEXT: sw t0, -32(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw s0, 56(sp) ; RV32I-WITHFP-NEXT: lw ra, 60(sp) @@ -241,8 +241,8 @@ define double @callee_small_scalar_ret() nounwind { ; RV32I-FPELIM-LABEL: callee_small_scalar_ret: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: mv a0, zero ; RV32I-FPELIM-NEXT: lui a1, 261888 +; RV32I-FPELIM-NEXT: mv a0, zero ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_small_scalar_ret: @@ -251,8 +251,8 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: mv a0, zero ; RV32I-WITHFP-NEXT: lui a1, 261888 +; RV32I-WITHFP-NEXT: mv a0, zero ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) ; RV32I-WITHFP-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -82,22 +82,22 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) nounwind { ; RV32I-FPELIM-LABEL: callee_many_scalars: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw t0, 0(sp) -; RV32I-FPELIM-NEXT: xor a4, a4, t0 -; RV32I-FPELIM-NEXT: xor a3, a3, a7 -; RV32I-FPELIM-NEXT: or a3, a3, a4 -; RV32I-FPELIM-NEXT: lui a4, 16 -; RV32I-FPELIM-NEXT: addi a4, a4, -1 -; RV32I-FPELIM-NEXT: and a1, a1, a4 -; RV32I-FPELIM-NEXT: andi a0, a0, 255 -; RV32I-FPELIM-NEXT: add a0, a0, a1 +; RV32I-FPELIM-NEXT: lw t0, 4(sp) +; RV32I-FPELIM-NEXT: lw t1, 0(sp) +; RV32I-FPELIM-NEXT: andi t2, a0, 255 +; RV32I-FPELIM-NEXT: lui a0, 16 +; RV32I-FPELIM-NEXT: addi a0, a0, -1 +; RV32I-FPELIM-NEXT: and a0, a1, a0 +; RV32I-FPELIM-NEXT: add a0, t2, a0 ; RV32I-FPELIM-NEXT: add a0, a0, a2 -; RV32I-FPELIM-NEXT: seqz a1, a3 +; RV32I-FPELIM-NEXT: xor a1, a4, t1 +; RV32I-FPELIM-NEXT: xor a2, a3, a7 +; RV32I-FPELIM-NEXT: or a1, a2, a1 +; RV32I-FPELIM-NEXT: seqz a1, a1 ; RV32I-FPELIM-NEXT: add a0, a1, a0 ; RV32I-FPELIM-NEXT: add a0, a0, a5 ; RV32I-FPELIM-NEXT: add a0, a0, a6 -; RV32I-FPELIM-NEXT: lw a1, 4(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 +; RV32I-FPELIM-NEXT: add a0, a0, t0 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_many_scalars: @@ -106,22 +106,22 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw t0, 0(s0) -; RV32I-WITHFP-NEXT: xor a4, a4, t0 -; RV32I-WITHFP-NEXT: xor a3, a3, a7 -; RV32I-WITHFP-NEXT: or a3, a3, a4 -; RV32I-WITHFP-NEXT: lui a4, 16 -; RV32I-WITHFP-NEXT: addi a4, a4, -1 -; RV32I-WITHFP-NEXT: and a1, a1, a4 -; RV32I-WITHFP-NEXT: andi a0, a0, 255 -; RV32I-WITHFP-NEXT: add a0, a0, a1 +; RV32I-WITHFP-NEXT: lw t0, 4(s0) +; RV32I-WITHFP-NEXT: lw t1, 0(s0) +; RV32I-WITHFP-NEXT: andi t2, a0, 255 +; RV32I-WITHFP-NEXT: lui a0, 16 +; RV32I-WITHFP-NEXT: addi a0, a0, -1 +; RV32I-WITHFP-NEXT: and a0, a1, a0 +; RV32I-WITHFP-NEXT: add a0, t2, a0 ; RV32I-WITHFP-NEXT: add a0, a0, a2 -; RV32I-WITHFP-NEXT: seqz a1, a3 +; RV32I-WITHFP-NEXT: xor a1, a4, t1 +; RV32I-WITHFP-NEXT: xor a2, a3, a7 +; RV32I-WITHFP-NEXT: or a1, a2, a1 +; RV32I-WITHFP-NEXT: seqz a1, a1 ; RV32I-WITHFP-NEXT: add a0, a1, a0 ; RV32I-WITHFP-NEXT: add a0, a0, a5 ; RV32I-WITHFP-NEXT: add a0, a0, a6 -; RV32I-WITHFP-NEXT: lw a1, 4(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 +; RV32I-WITHFP-NEXT: add a0, a0, t0 ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -146,15 +146,15 @@ ; RV32I-FPELIM-NEXT: sw ra, 12(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 8 ; RV32I-FPELIM-NEXT: sw a0, 4(sp) -; RV32I-FPELIM-NEXT: sw zero, 0(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 2 ; RV32I-FPELIM-NEXT: addi a2, zero, 3 ; RV32I-FPELIM-NEXT: addi a3, zero, 4 -; RV32I-FPELIM-NEXT: mv a4, zero ; RV32I-FPELIM-NEXT: addi a5, zero, 5 ; RV32I-FPELIM-NEXT: addi a6, zero, 6 ; RV32I-FPELIM-NEXT: addi a7, zero, 7 +; RV32I-FPELIM-NEXT: sw zero, 0(sp) +; RV32I-FPELIM-NEXT: mv a4, zero ; RV32I-FPELIM-NEXT: call callee_many_scalars ; RV32I-FPELIM-NEXT: lw ra, 12(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 16 @@ -168,15 +168,15 @@ ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: addi a0, zero, 8 ; RV32I-WITHFP-NEXT: sw a0, 4(sp) -; RV32I-WITHFP-NEXT: sw zero, 0(sp) ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: addi a1, zero, 2 ; RV32I-WITHFP-NEXT: addi a2, zero, 3 ; RV32I-WITHFP-NEXT: addi a3, zero, 4 -; RV32I-WITHFP-NEXT: mv a4, zero ; RV32I-WITHFP-NEXT: addi a5, zero, 5 ; RV32I-WITHFP-NEXT: addi a6, zero, 6 ; RV32I-WITHFP-NEXT: addi a7, zero, 7 +; RV32I-WITHFP-NEXT: sw zero, 0(sp) +; RV32I-WITHFP-NEXT: mv a4, zero ; RV32I-WITHFP-NEXT: call callee_many_scalars ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) @@ -192,20 +192,20 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-LABEL: callee_large_scalars: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw a2, 12(a1) -; RV32I-FPELIM-NEXT: lw a3, 12(a0) -; RV32I-FPELIM-NEXT: xor a2, a3, a2 -; RV32I-FPELIM-NEXT: lw a3, 4(a1) -; RV32I-FPELIM-NEXT: lw a4, 4(a0) -; RV32I-FPELIM-NEXT: xor a3, a4, a3 +; RV32I-FPELIM-NEXT: lw a6, 0(a1) +; RV32I-FPELIM-NEXT: lw a7, 0(a0) +; RV32I-FPELIM-NEXT: lw a4, 4(a1) +; RV32I-FPELIM-NEXT: lw a5, 12(a1) +; RV32I-FPELIM-NEXT: lw a2, 12(a0) +; RV32I-FPELIM-NEXT: lw a3, 4(a0) +; RV32I-FPELIM-NEXT: lw a1, 8(a1) +; RV32I-FPELIM-NEXT: lw a0, 8(a0) +; RV32I-FPELIM-NEXT: xor a2, a2, a5 +; RV32I-FPELIM-NEXT: xor a3, a3, a4 ; RV32I-FPELIM-NEXT: or a2, a3, a2 -; RV32I-FPELIM-NEXT: lw a3, 8(a1) -; RV32I-FPELIM-NEXT: lw a4, 8(a0) -; RV32I-FPELIM-NEXT: xor a3, a4, a3 -; RV32I-FPELIM-NEXT: lw a1, 0(a1) -; RV32I-FPELIM-NEXT: lw a0, 0(a0) ; RV32I-FPELIM-NEXT: xor a0, a0, a1 -; RV32I-FPELIM-NEXT: or a0, a0, a3 +; RV32I-FPELIM-NEXT: xor a1, a7, a6 +; RV32I-FPELIM-NEXT: or a0, a1, a0 ; RV32I-FPELIM-NEXT: or a0, a0, a2 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret @@ -216,20 +216,20 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw a2, 12(a1) -; RV32I-WITHFP-NEXT: lw a3, 12(a0) -; RV32I-WITHFP-NEXT: xor a2, a3, a2 -; RV32I-WITHFP-NEXT: lw a3, 4(a1) -; RV32I-WITHFP-NEXT: lw a4, 4(a0) -; RV32I-WITHFP-NEXT: xor a3, a4, a3 +; RV32I-WITHFP-NEXT: lw a6, 0(a1) +; RV32I-WITHFP-NEXT: lw a7, 0(a0) +; RV32I-WITHFP-NEXT: lw a4, 4(a1) +; RV32I-WITHFP-NEXT: lw a5, 12(a1) +; RV32I-WITHFP-NEXT: lw a2, 12(a0) +; RV32I-WITHFP-NEXT: lw a3, 4(a0) +; RV32I-WITHFP-NEXT: lw a1, 8(a1) +; RV32I-WITHFP-NEXT: lw a0, 8(a0) +; RV32I-WITHFP-NEXT: xor a2, a2, a5 +; RV32I-WITHFP-NEXT: xor a3, a3, a4 ; RV32I-WITHFP-NEXT: or a2, a3, a2 -; RV32I-WITHFP-NEXT: lw a3, 8(a1) -; RV32I-WITHFP-NEXT: lw a4, 8(a0) -; RV32I-WITHFP-NEXT: xor a3, a4, a3 -; RV32I-WITHFP-NEXT: lw a1, 0(a1) -; RV32I-WITHFP-NEXT: lw a0, 0(a0) ; RV32I-WITHFP-NEXT: xor a0, a0, a1 -; RV32I-WITHFP-NEXT: or a0, a0, a3 +; RV32I-WITHFP-NEXT: xor a1, a7, a6 +; RV32I-WITHFP-NEXT: or a0, a1, a0 ; RV32I-WITHFP-NEXT: or a0, a0, a2 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw s0, 8(sp) @@ -255,10 +255,10 @@ ; RV32I-FPELIM-NEXT: sw zero, 36(sp) ; RV32I-FPELIM-NEXT: sw zero, 32(sp) ; RV32I-FPELIM-NEXT: sw zero, 28(sp) -; RV32I-FPELIM-NEXT: addi a0, zero, 1 -; RV32I-FPELIM-NEXT: sw a0, 24(sp) +; RV32I-FPELIM-NEXT: addi a2, zero, 1 ; RV32I-FPELIM-NEXT: addi a0, sp, 24 ; RV32I-FPELIM-NEXT: mv a1, sp +; RV32I-FPELIM-NEXT: sw a2, 24(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars ; RV32I-FPELIM-NEXT: lw ra, 44(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 48 @@ -278,10 +278,10 @@ ; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) -; RV32I-WITHFP-NEXT: addi a0, zero, 1 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: addi a2, zero, 1 ; RV32I-WITHFP-NEXT: addi a0, s0, -24 ; RV32I-WITHFP-NEXT: addi a1, s0, -48 +; RV32I-WITHFP-NEXT: sw a2, -24(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars ; RV32I-WITHFP-NEXT: lw s0, 40(sp) ; RV32I-WITHFP-NEXT: lw ra, 44(sp) @@ -299,20 +299,20 @@ ; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 4(sp) -; RV32I-FPELIM-NEXT: lw a1, 12(a0) -; RV32I-FPELIM-NEXT: lw a2, 12(a7) -; RV32I-FPELIM-NEXT: xor a1, a2, a1 -; RV32I-FPELIM-NEXT: lw a2, 4(a0) -; RV32I-FPELIM-NEXT: lw a3, 4(a7) -; RV32I-FPELIM-NEXT: xor a2, a3, a2 -; RV32I-FPELIM-NEXT: or a1, a2, a1 -; RV32I-FPELIM-NEXT: lw a2, 8(a0) -; RV32I-FPELIM-NEXT: lw a3, 8(a7) -; RV32I-FPELIM-NEXT: xor a2, a3, a2 -; RV32I-FPELIM-NEXT: lw a0, 0(a0) -; RV32I-FPELIM-NEXT: lw a3, 0(a7) -; RV32I-FPELIM-NEXT: xor a0, a3, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a2 +; RV32I-FPELIM-NEXT: lw a6, 0(a0) +; RV32I-FPELIM-NEXT: lw t0, 0(a7) +; RV32I-FPELIM-NEXT: lw a3, 4(a0) +; RV32I-FPELIM-NEXT: lw a4, 12(a0) +; RV32I-FPELIM-NEXT: lw a5, 12(a7) +; RV32I-FPELIM-NEXT: lw a1, 4(a7) +; RV32I-FPELIM-NEXT: lw a0, 8(a0) +; RV32I-FPELIM-NEXT: lw a2, 8(a7) +; RV32I-FPELIM-NEXT: xor a4, a5, a4 +; RV32I-FPELIM-NEXT: xor a1, a1, a3 +; RV32I-FPELIM-NEXT: or a1, a1, a4 +; RV32I-FPELIM-NEXT: xor a0, a2, a0 +; RV32I-FPELIM-NEXT: xor a2, t0, a6 +; RV32I-FPELIM-NEXT: or a0, a2, a0 ; RV32I-FPELIM-NEXT: or a0, a0, a1 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret @@ -324,20 +324,20 @@ ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 4(s0) -; RV32I-WITHFP-NEXT: lw a1, 12(a0) -; RV32I-WITHFP-NEXT: lw a2, 12(a7) -; RV32I-WITHFP-NEXT: xor a1, a2, a1 -; RV32I-WITHFP-NEXT: lw a2, 4(a0) -; RV32I-WITHFP-NEXT: lw a3, 4(a7) -; RV32I-WITHFP-NEXT: xor a2, a3, a2 -; RV32I-WITHFP-NEXT: or a1, a2, a1 -; RV32I-WITHFP-NEXT: lw a2, 8(a0) -; RV32I-WITHFP-NEXT: lw a3, 8(a7) -; RV32I-WITHFP-NEXT: xor a2, a3, a2 -; RV32I-WITHFP-NEXT: lw a0, 0(a0) -; RV32I-WITHFP-NEXT: lw a3, 0(a7) -; RV32I-WITHFP-NEXT: xor a0, a3, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a2 +; RV32I-WITHFP-NEXT: lw a6, 0(a0) +; RV32I-WITHFP-NEXT: lw t0, 0(a7) +; RV32I-WITHFP-NEXT: lw a3, 4(a0) +; RV32I-WITHFP-NEXT: lw a4, 12(a0) +; RV32I-WITHFP-NEXT: lw a5, 12(a7) +; RV32I-WITHFP-NEXT: lw a1, 4(a7) +; RV32I-WITHFP-NEXT: lw a0, 8(a0) +; RV32I-WITHFP-NEXT: lw a2, 8(a7) +; RV32I-WITHFP-NEXT: xor a4, a5, a4 +; RV32I-WITHFP-NEXT: xor a1, a1, a3 +; RV32I-WITHFP-NEXT: or a1, a1, a4 +; RV32I-WITHFP-NEXT: xor a0, a2, a0 +; RV32I-WITHFP-NEXT: xor a2, t0, a6 +; RV32I-WITHFP-NEXT: or a0, a2, a0 ; RV32I-WITHFP-NEXT: or a0, a0, a1 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw s0, 8(sp) @@ -367,8 +367,7 @@ ; RV32I-FPELIM-NEXT: sw zero, 52(sp) ; RV32I-FPELIM-NEXT: sw zero, 48(sp) ; RV32I-FPELIM-NEXT: sw zero, 44(sp) -; RV32I-FPELIM-NEXT: addi a0, zero, 8 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: addi t0, zero, 8 ; RV32I-FPELIM-NEXT: addi a7, sp, 40 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 2 @@ -377,6 +376,7 @@ ; RV32I-FPELIM-NEXT: addi a4, zero, 5 ; RV32I-FPELIM-NEXT: addi a5, zero, 6 ; RV32I-FPELIM-NEXT: addi a6, zero, 7 +; RV32I-FPELIM-NEXT: sw t0, 40(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars_exhausted_regs ; RV32I-FPELIM-NEXT: lw ra, 60(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -400,8 +400,7 @@ ; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) -; RV32I-WITHFP-NEXT: addi a0, zero, 8 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: addi t0, zero, 8 ; RV32I-WITHFP-NEXT: addi a7, s0, -24 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: addi a1, zero, 2 @@ -410,6 +409,7 @@ ; RV32I-WITHFP-NEXT: addi a4, zero, 5 ; RV32I-WITHFP-NEXT: addi a5, zero, 6 ; RV32I-WITHFP-NEXT: addi a6, zero, 7 +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars_exhausted_regs ; RV32I-WITHFP-NEXT: lw s0, 56(sp) ; RV32I-WITHFP-NEXT: lw ra, 60(sp) @@ -524,9 +524,9 @@ define i32 @callee_large_struct(%struct.large* byval align 4 %a) nounwind { ; RV32I-FPELIM-LABEL: callee_large_struct: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw a1, 12(a0) -; RV32I-FPELIM-NEXT: lw a0, 0(a0) -; RV32I-FPELIM-NEXT: add a0, a0, a1 +; RV32I-FPELIM-NEXT: lw a1, 0(a0) +; RV32I-FPELIM-NEXT: lw a0, 12(a0) +; RV32I-FPELIM-NEXT: add a0, a1, a0 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_large_struct: @@ -535,9 +535,9 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw a1, 12(a0) -; RV32I-WITHFP-NEXT: lw a0, 0(a0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 +; RV32I-WITHFP-NEXT: lw a1, 0(a0) +; RV32I-WITHFP-NEXT: lw a0, 12(a0) +; RV32I-WITHFP-NEXT: add a0, a1, a0 ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -557,16 +557,16 @@ ; RV32I-FPELIM-NEXT: sw ra, 44(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: sw a0, 24(sp) +; RV32I-FPELIM-NEXT: addi a1, zero, 2 +; RV32I-FPELIM-NEXT: sw a1, 28(sp) +; RV32I-FPELIM-NEXT: addi a2, zero, 3 +; RV32I-FPELIM-NEXT: sw a2, 32(sp) +; RV32I-FPELIM-NEXT: addi a3, zero, 4 +; RV32I-FPELIM-NEXT: sw a3, 36(sp) ; RV32I-FPELIM-NEXT: sw a0, 8(sp) -; RV32I-FPELIM-NEXT: addi a0, zero, 2 -; RV32I-FPELIM-NEXT: sw a0, 28(sp) -; RV32I-FPELIM-NEXT: sw a0, 12(sp) -; RV32I-FPELIM-NEXT: addi a0, zero, 3 -; RV32I-FPELIM-NEXT: sw a0, 32(sp) -; RV32I-FPELIM-NEXT: sw a0, 16(sp) -; RV32I-FPELIM-NEXT: addi a0, zero, 4 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) -; RV32I-FPELIM-NEXT: sw a0, 20(sp) +; RV32I-FPELIM-NEXT: sw a1, 12(sp) +; RV32I-FPELIM-NEXT: sw a2, 16(sp) +; RV32I-FPELIM-NEXT: sw a3, 20(sp) ; RV32I-FPELIM-NEXT: addi a0, sp, 8 ; RV32I-FPELIM-NEXT: call callee_large_struct ; RV32I-FPELIM-NEXT: lw ra, 44(sp) @@ -581,16 +581,16 @@ ; RV32I-WITHFP-NEXT: addi s0, sp, 48 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: addi a1, zero, 2 +; RV32I-WITHFP-NEXT: sw a1, -20(s0) +; RV32I-WITHFP-NEXT: addi a2, zero, 3 +; RV32I-WITHFP-NEXT: sw a2, -16(s0) +; RV32I-WITHFP-NEXT: addi a3, zero, 4 +; RV32I-WITHFP-NEXT: sw a3, -12(s0) ; RV32I-WITHFP-NEXT: sw a0, -40(s0) -; RV32I-WITHFP-NEXT: addi a0, zero, 2 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) -; RV32I-WITHFP-NEXT: sw a0, -36(s0) -; RV32I-WITHFP-NEXT: addi a0, zero, 3 -; RV32I-WITHFP-NEXT: sw a0, -16(s0) -; RV32I-WITHFP-NEXT: sw a0, -32(s0) -; RV32I-WITHFP-NEXT: addi a0, zero, 4 -; RV32I-WITHFP-NEXT: sw a0, -12(s0) -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw a1, -36(s0) +; RV32I-WITHFP-NEXT: sw a2, -32(s0) +; RV32I-WITHFP-NEXT: sw a3, -28(s0) ; RV32I-WITHFP-NEXT: addi a0, s0, -40 ; RV32I-WITHFP-NEXT: call callee_large_struct ; RV32I-WITHFP-NEXT: lw s0, 40(sp) @@ -619,14 +619,14 @@ ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: lw a1, 0(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: lw a1, 8(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: lw a1, 16(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: lw a1, 20(sp) +; RV32I-FPELIM-NEXT: lw a2, 0(sp) +; RV32I-FPELIM-NEXT: lw a3, 8(sp) +; RV32I-FPELIM-NEXT: lw a4, 16(sp) +; RV32I-FPELIM-NEXT: add a0, a0, a7 +; RV32I-FPELIM-NEXT: add a0, a0, a2 +; RV32I-FPELIM-NEXT: add a0, a0, a3 +; RV32I-FPELIM-NEXT: add a0, a0, a4 ; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: ret ; @@ -637,14 +637,14 @@ ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: lw a1, 0(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: lw a1, 8(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: lw a1, 16(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw a1, 20(s0) +; RV32I-WITHFP-NEXT: lw a2, 0(s0) +; RV32I-WITHFP-NEXT: lw a3, 8(s0) +; RV32I-WITHFP-NEXT: lw a4, 16(s0) +; RV32I-WITHFP-NEXT: add a0, a0, a7 +; RV32I-WITHFP-NEXT: add a0, a0, a2 +; RV32I-WITHFP-NEXT: add a0, a0, a3 +; RV32I-WITHFP-NEXT: add a0, a0, a4 ; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) @@ -690,8 +690,7 @@ ; RV32I-FPELIM-NEXT: addi a0, a0, -328 ; RV32I-FPELIM-NEXT: sw a0, 36(sp) ; RV32I-FPELIM-NEXT: lui a0, 335544 -; RV32I-FPELIM-NEXT: addi a0, a0, 1311 -; RV32I-FPELIM-NEXT: sw a0, 32(sp) +; RV32I-FPELIM-NEXT: addi t0, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: addi a2, sp, 32 @@ -701,6 +700,7 @@ ; RV32I-FPELIM-NEXT: addi a4, zero, 13 ; RV32I-FPELIM-NEXT: addi a6, zero, 4 ; RV32I-FPELIM-NEXT: addi a7, zero, 14 +; RV32I-FPELIM-NEXT: sw t0, 32(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -733,8 +733,7 @@ ; RV32I-WITHFP-NEXT: addi a0, a0, -328 ; RV32I-WITHFP-NEXT: sw a0, -28(s0) ; RV32I-WITHFP-NEXT: lui a0, 335544 -; RV32I-WITHFP-NEXT: addi a0, a0, 1311 -; RV32I-WITHFP-NEXT: sw a0, -32(s0) +; RV32I-WITHFP-NEXT: addi t0, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: addi a2, s0, -32 @@ -744,6 +743,7 @@ ; RV32I-WITHFP-NEXT: addi a4, zero, 13 ; RV32I-WITHFP-NEXT: addi a6, zero, 4 ; RV32I-WITHFP-NEXT: addi a7, zero, 14 +; RV32I-WITHFP-NEXT: sw t0, -32(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw s0, 56(sp) ; RV32I-WITHFP-NEXT: lw ra, 60(sp) @@ -787,17 +787,15 @@ ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -16 ; RV32I-FPELIM-NEXT: sw ra, 12(sp) -; RV32I-FPELIM-NEXT: sw s0, 8(sp) -; RV32I-FPELIM-NEXT: lui a0, 56 -; RV32I-FPELIM-NEXT: addi s0, a0, 580 ; RV32I-FPELIM-NEXT: call callee_small_scalar_ret -; RV32I-FPELIM-NEXT: xor a1, a1, s0 +; RV32I-FPELIM-NEXT: lui a2, 56 +; RV32I-FPELIM-NEXT: addi a2, a2, 580 +; RV32I-FPELIM-NEXT: xor a1, a1, a2 ; RV32I-FPELIM-NEXT: lui a2, 200614 ; RV32I-FPELIM-NEXT: addi a2, a2, 647 ; RV32I-FPELIM-NEXT: xor a0, a0, a2 ; RV32I-FPELIM-NEXT: or a0, a0, a1 ; RV32I-FPELIM-NEXT: seqz a0, a0 -; RV32I-FPELIM-NEXT: lw s0, 8(sp) ; RV32I-FPELIM-NEXT: lw ra, 12(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 16 ; RV32I-FPELIM-NEXT: ret @@ -807,18 +805,16 @@ ; RV32I-WITHFP-NEXT: addi sp, sp, -16 ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) -; RV32I-WITHFP-NEXT: sw s1, 4(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lui a0, 56 -; RV32I-WITHFP-NEXT: addi s1, a0, 580 ; RV32I-WITHFP-NEXT: call callee_small_scalar_ret -; RV32I-WITHFP-NEXT: xor a1, a1, s1 +; RV32I-WITHFP-NEXT: lui a2, 56 +; RV32I-WITHFP-NEXT: addi a2, a2, 580 +; RV32I-WITHFP-NEXT: xor a1, a1, a2 ; RV32I-WITHFP-NEXT: lui a2, 200614 ; RV32I-WITHFP-NEXT: addi a2, a2, 647 ; RV32I-WITHFP-NEXT: xor a0, a0, a2 ; RV32I-WITHFP-NEXT: or a0, a0, a1 ; RV32I-WITHFP-NEXT: seqz a0, a0 -; RV32I-WITHFP-NEXT: lw s1, 4(sp) ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -946,14 +942,14 @@ define void @callee_large_struct_ret(%struct.large* noalias sret %agg.result) nounwind { ; RV32I-FPELIM-LABEL: callee_large_struct_ret: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: addi a1, zero, 4 -; RV32I-FPELIM-NEXT: sw a1, 12(a0) -; RV32I-FPELIM-NEXT: addi a1, zero, 3 -; RV32I-FPELIM-NEXT: sw a1, 8(a0) -; RV32I-FPELIM-NEXT: addi a1, zero, 2 -; RV32I-FPELIM-NEXT: sw a1, 4(a0) ; RV32I-FPELIM-NEXT: addi a1, zero, 1 ; RV32I-FPELIM-NEXT: sw a1, 0(a0) +; RV32I-FPELIM-NEXT: addi a1, zero, 2 +; RV32I-FPELIM-NEXT: sw a1, 4(a0) +; RV32I-FPELIM-NEXT: addi a1, zero, 3 +; RV32I-FPELIM-NEXT: sw a1, 8(a0) +; RV32I-FPELIM-NEXT: addi a1, zero, 4 +; RV32I-FPELIM-NEXT: sw a1, 12(a0) ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_large_struct_ret: @@ -962,14 +958,14 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: addi a1, zero, 4 -; RV32I-WITHFP-NEXT: sw a1, 12(a0) -; RV32I-WITHFP-NEXT: addi a1, zero, 3 -; RV32I-WITHFP-NEXT: sw a1, 8(a0) -; RV32I-WITHFP-NEXT: addi a1, zero, 2 -; RV32I-WITHFP-NEXT: sw a1, 4(a0) ; RV32I-WITHFP-NEXT: addi a1, zero, 1 ; RV32I-WITHFP-NEXT: sw a1, 0(a0) +; RV32I-WITHFP-NEXT: addi a1, zero, 2 +; RV32I-WITHFP-NEXT: sw a1, 4(a0) +; RV32I-WITHFP-NEXT: addi a1, zero, 3 +; RV32I-WITHFP-NEXT: sw a1, 8(a0) +; RV32I-WITHFP-NEXT: addi a1, zero, 4 +; RV32I-WITHFP-NEXT: sw a1, 12(a0) ; RV32I-WITHFP-NEXT: lw s0, 8(sp) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -992,9 +988,9 @@ ; RV32I-FPELIM-NEXT: sw ra, 28(sp) ; RV32I-FPELIM-NEXT: addi a0, sp, 8 ; RV32I-FPELIM-NEXT: call callee_large_struct_ret -; RV32I-FPELIM-NEXT: lw a0, 20(sp) -; RV32I-FPELIM-NEXT: lw a1, 8(sp) -; RV32I-FPELIM-NEXT: add a0, a1, a0 +; RV32I-FPELIM-NEXT: lw a0, 8(sp) +; RV32I-FPELIM-NEXT: lw a1, 20(sp) +; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: lw ra, 28(sp) ; RV32I-FPELIM-NEXT: addi sp, sp, 32 ; RV32I-FPELIM-NEXT: ret @@ -1007,9 +1003,9 @@ ; RV32I-WITHFP-NEXT: addi s0, sp, 32 ; RV32I-WITHFP-NEXT: addi a0, s0, -24 ; RV32I-WITHFP-NEXT: call callee_large_struct_ret -; RV32I-WITHFP-NEXT: lw a0, -12(s0) -; RV32I-WITHFP-NEXT: lw a1, -24(s0) -; RV32I-WITHFP-NEXT: add a0, a1, a0 +; RV32I-WITHFP-NEXT: lw a0, -24(s0) +; RV32I-WITHFP-NEXT: lw a1, -12(s0) +; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw s0, 24(sp) ; RV32I-WITHFP-NEXT: lw ra, 28(sp) ; RV32I-WITHFP-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll @@ -107,15 +107,15 @@ ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -16 ; RV32I-FPELIM-NEXT: sw ra, 12(sp) -; RV32I-FPELIM-NEXT: lui a0, 264704 -; RV32I-FPELIM-NEXT: sw a0, 0(sp) +; RV32I-FPELIM-NEXT: lui a1, 264704 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 -; RV32I-FPELIM-NEXT: mv a1, zero ; RV32I-FPELIM-NEXT: addi a2, zero, 2 -; RV32I-FPELIM-NEXT: mv a3, zero ; RV32I-FPELIM-NEXT: addi a4, zero, 3 -; RV32I-FPELIM-NEXT: mv a5, zero ; RV32I-FPELIM-NEXT: addi a6, zero, 4 +; RV32I-FPELIM-NEXT: sw a1, 0(sp) +; RV32I-FPELIM-NEXT: mv a1, zero +; RV32I-FPELIM-NEXT: mv a3, zero +; RV32I-FPELIM-NEXT: mv a5, zero ; RV32I-FPELIM-NEXT: mv a7, zero ; RV32I-FPELIM-NEXT: call callee_float_on_stack ; RV32I-FPELIM-NEXT: lw ra, 12(sp) @@ -128,15 +128,15 @@ ; RV32I-WITHFP-NEXT: sw ra, 12(sp) ; RV32I-WITHFP-NEXT: sw s0, 8(sp) ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lui a0, 264704 -; RV32I-WITHFP-NEXT: sw a0, 0(sp) +; RV32I-WITHFP-NEXT: lui a1, 264704 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 -; RV32I-WITHFP-NEXT: mv a1, zero ; RV32I-WITHFP-NEXT: addi a2, zero, 2 -; RV32I-WITHFP-NEXT: mv a3, zero ; RV32I-WITHFP-NEXT: addi a4, zero, 3 -; RV32I-WITHFP-NEXT: mv a5, zero ; RV32I-WITHFP-NEXT: addi a6, zero, 4 +; RV32I-WITHFP-NEXT: sw a1, 0(sp) +; RV32I-WITHFP-NEXT: mv a1, zero +; RV32I-WITHFP-NEXT: mv a3, zero +; RV32I-WITHFP-NEXT: mv a5, zero ; RV32I-WITHFP-NEXT: mv a7, zero ; RV32I-WITHFP-NEXT: call callee_float_on_stack ; RV32I-WITHFP-NEXT: lw s0, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll @@ -38,9 +38,9 @@ define i32 @callee_double_in_fpr_exhausted_gprs(i64 %a, i64 %b, i64 %c, i64 %d, i32 %e, double %f) nounwind { ; RV32-ILP32D-LABEL: callee_double_in_fpr_exhausted_gprs: ; RV32-ILP32D: # %bb.0: -; RV32-ILP32D-NEXT: fcvt.w.d a0, fa0, rtz -; RV32-ILP32D-NEXT: lw a1, 0(sp) -; RV32-ILP32D-NEXT: add a0, a1, a0 +; RV32-ILP32D-NEXT: lw a0, 0(sp) +; RV32-ILP32D-NEXT: fcvt.w.d a1, fa0, rtz +; RV32-ILP32D-NEXT: add a0, a0, a1 ; RV32-ILP32D-NEXT: ret %f_fptosi = fptosi double %f to i32 %1 = add i32 %e, %f_fptosi @@ -52,18 +52,18 @@ ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) -; RV32-ILP32D-NEXT: addi a0, zero, 5 -; RV32-ILP32D-NEXT: sw a0, 0(sp) +; RV32-ILP32D-NEXT: addi a1, zero, 5 ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI3_0) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI3_0) ; RV32-ILP32D-NEXT: fld fa0, 0(a0) ; RV32-ILP32D-NEXT: addi a0, zero, 1 -; RV32-ILP32D-NEXT: mv a1, zero ; RV32-ILP32D-NEXT: addi a2, zero, 2 -; RV32-ILP32D-NEXT: mv a3, zero ; RV32-ILP32D-NEXT: addi a4, zero, 3 -; RV32-ILP32D-NEXT: mv a5, zero ; RV32-ILP32D-NEXT: addi a6, zero, 4 +; RV32-ILP32D-NEXT: sw a1, 0(sp) +; RV32-ILP32D-NEXT: mv a1, zero +; RV32-ILP32D-NEXT: mv a3, zero +; RV32-ILP32D-NEXT: mv a5, zero ; RV32-ILP32D-NEXT: mv a7, zero ; RV32-ILP32D-NEXT: call callee_double_in_fpr_exhausted_gprs ; RV32-ILP32D-NEXT: lw ra, 12(sp) @@ -82,9 +82,9 @@ ; RV32-ILP32D-NEXT: sw a0, 8(sp) ; RV32-ILP32D-NEXT: sw a1, 12(sp) ; RV32-ILP32D-NEXT: fld ft0, 8(sp) -; RV32-ILP32D-NEXT: fcvt.w.d a0, ft0, rtz -; RV32-ILP32D-NEXT: fcvt.w.d a1, fa7, rtz -; RV32-ILP32D-NEXT: add a0, a1, a0 +; RV32-ILP32D-NEXT: fcvt.w.d a0, fa7, rtz +; RV32-ILP32D-NEXT: fcvt.w.d a1, ft0, rtz +; RV32-ILP32D-NEXT: add a0, a0, a1 ; RV32-ILP32D-NEXT: addi sp, sp, 16 ; RV32-ILP32D-NEXT: ret %h_fptosi = fptosi double %h to i32 @@ -100,21 +100,21 @@ ; RV32-ILP32D-NEXT: sw ra, 12(sp) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_0) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_0) -; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI5_1) -; RV32-ILP32D-NEXT: addi a1, a1, %lo(.LCPI5_1) -; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI5_2) -; RV32-ILP32D-NEXT: addi a2, a2, %lo(.LCPI5_2) -; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI5_3) -; RV32-ILP32D-NEXT: addi a3, a3, %lo(.LCPI5_3) -; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI5_4) -; RV32-ILP32D-NEXT: addi a4, a4, %lo(.LCPI5_4) -; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI5_5) -; RV32-ILP32D-NEXT: addi a5, a5, %lo(.LCPI5_5) -; RV32-ILP32D-NEXT: fld fa0, 0(a5) -; RV32-ILP32D-NEXT: fld fa1, 0(a4) -; RV32-ILP32D-NEXT: fld fa2, 0(a3) -; RV32-ILP32D-NEXT: fld fa3, 0(a2) -; RV32-ILP32D-NEXT: fld fa4, 0(a1) +; RV32-ILP32D-NEXT: fld fa0, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_1) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_1) +; RV32-ILP32D-NEXT: fld fa1, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_2) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_2) +; RV32-ILP32D-NEXT: fld fa2, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_3) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_3) +; RV32-ILP32D-NEXT: fld fa3, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_4) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_4) +; RV32-ILP32D-NEXT: fld fa4, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_5) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_5) ; RV32-ILP32D-NEXT: fld fa5, 0(a0) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_6) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_6) @@ -122,8 +122,8 @@ ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_7) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI5_7) ; RV32-ILP32D-NEXT: fld fa7, 0(a0) -; RV32-ILP32D-NEXT: mv a0, zero ; RV32-ILP32D-NEXT: lui a1, 262688 +; RV32-ILP32D-NEXT: mv a0, zero ; RV32-ILP32D-NEXT: call callee_double_in_gpr_exhausted_fprs ; RV32-ILP32D-NEXT: lw ra, 12(sp) ; RV32-ILP32D-NEXT: addi sp, sp, 16 @@ -157,39 +157,39 @@ ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) -; RV32-ILP32D-NEXT: lui a0, 262816 -; RV32-ILP32D-NEXT: sw a0, 0(sp) +; RV32-ILP32D-NEXT: lui a1, 262816 ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-ILP32D-NEXT: addi a6, a0, %lo(.LCPI7_0) -; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI7_1) -; RV32-ILP32D-NEXT: addi a1, a1, %lo(.LCPI7_1) -; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI7_2) -; RV32-ILP32D-NEXT: addi a2, a2, %lo(.LCPI7_2) -; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI7_3) -; RV32-ILP32D-NEXT: addi a3, a3, %lo(.LCPI7_3) -; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI7_4) -; RV32-ILP32D-NEXT: addi a4, a4, %lo(.LCPI7_4) -; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI7_5) -; RV32-ILP32D-NEXT: addi a5, a5, %lo(.LCPI7_5) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_0) +; RV32-ILP32D-NEXT: fld fa0, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_1) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_1) +; RV32-ILP32D-NEXT: fld fa1, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_2) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_2) +; RV32-ILP32D-NEXT: fld fa2, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_3) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_3) +; RV32-ILP32D-NEXT: fld fa3, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_4) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_4) +; RV32-ILP32D-NEXT: fld fa4, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_5) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_5) +; RV32-ILP32D-NEXT: fld fa5, 0(a0) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_6) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_6) -; RV32-ILP32D-NEXT: fld fa0, 0(a0) -; RV32-ILP32D-NEXT: fld fa1, 0(a5) -; RV32-ILP32D-NEXT: fld fa2, 0(a4) -; RV32-ILP32D-NEXT: fld fa3, 0(a3) -; RV32-ILP32D-NEXT: fld fa4, 0(a2) -; RV32-ILP32D-NEXT: fld fa5, 0(a1) -; RV32-ILP32D-NEXT: fld fa6, 0(a6) +; RV32-ILP32D-NEXT: fld fa6, 0(a0) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_7) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI7_7) ; RV32-ILP32D-NEXT: fld fa7, 0(a0) ; RV32-ILP32D-NEXT: addi a0, zero, 1 -; RV32-ILP32D-NEXT: mv a1, zero ; RV32-ILP32D-NEXT: addi a2, zero, 3 -; RV32-ILP32D-NEXT: mv a3, zero ; RV32-ILP32D-NEXT: addi a4, zero, 5 -; RV32-ILP32D-NEXT: mv a5, zero ; RV32-ILP32D-NEXT: addi a6, zero, 7 +; RV32-ILP32D-NEXT: sw a1, 0(sp) +; RV32-ILP32D-NEXT: mv a1, zero +; RV32-ILP32D-NEXT: mv a3, zero +; RV32-ILP32D-NEXT: mv a5, zero ; RV32-ILP32D-NEXT: mv a7, zero ; RV32-ILP32D-NEXT: call callee_double_in_gpr_and_stack_almost_exhausted_gprs_fprs ; RV32-ILP32D-NEXT: lw ra, 12(sp) @@ -223,38 +223,38 @@ ; RV32-ILP32D-NEXT: sw ra, 12(sp) ; RV32-ILP32D-NEXT: lui a0, 262816 ; RV32-ILP32D-NEXT: sw a0, 4(sp) -; RV32-ILP32D-NEXT: sw zero, 0(sp) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_0) -; RV32-ILP32D-NEXT: addi a6, a0, %lo(.LCPI9_0) -; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI9_1) -; RV32-ILP32D-NEXT: addi a1, a1, %lo(.LCPI9_1) -; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI9_2) -; RV32-ILP32D-NEXT: addi a2, a2, %lo(.LCPI9_2) -; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI9_3) -; RV32-ILP32D-NEXT: addi a3, a3, %lo(.LCPI9_3) -; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI9_4) -; RV32-ILP32D-NEXT: addi a4, a4, %lo(.LCPI9_4) -; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI9_5) -; RV32-ILP32D-NEXT: addi a5, a5, %lo(.LCPI9_5) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_0) +; RV32-ILP32D-NEXT: fld fa0, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_1) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_1) +; RV32-ILP32D-NEXT: fld fa1, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_2) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_2) +; RV32-ILP32D-NEXT: fld fa2, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_3) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_3) +; RV32-ILP32D-NEXT: fld fa3, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_4) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_4) +; RV32-ILP32D-NEXT: fld fa4, 0(a0) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_5) +; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_5) +; RV32-ILP32D-NEXT: fld fa5, 0(a0) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_6) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_6) -; RV32-ILP32D-NEXT: fld fa0, 0(a0) -; RV32-ILP32D-NEXT: fld fa1, 0(a5) -; RV32-ILP32D-NEXT: fld fa2, 0(a4) -; RV32-ILP32D-NEXT: fld fa3, 0(a3) -; RV32-ILP32D-NEXT: fld fa4, 0(a2) -; RV32-ILP32D-NEXT: fld fa5, 0(a1) -; RV32-ILP32D-NEXT: fld fa6, 0(a6) +; RV32-ILP32D-NEXT: fld fa6, 0(a0) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_7) ; RV32-ILP32D-NEXT: addi a0, a0, %lo(.LCPI9_7) ; RV32-ILP32D-NEXT: fld fa7, 0(a0) ; RV32-ILP32D-NEXT: addi a0, zero, 1 -; RV32-ILP32D-NEXT: mv a1, zero ; RV32-ILP32D-NEXT: addi a2, zero, 3 -; RV32-ILP32D-NEXT: mv a3, zero ; RV32-ILP32D-NEXT: addi a4, zero, 5 -; RV32-ILP32D-NEXT: mv a5, zero ; RV32-ILP32D-NEXT: addi a6, zero, 7 +; RV32-ILP32D-NEXT: sw zero, 0(sp) +; RV32-ILP32D-NEXT: mv a1, zero +; RV32-ILP32D-NEXT: mv a3, zero +; RV32-ILP32D-NEXT: mv a5, zero ; RV32-ILP32D-NEXT: mv a7, zero ; RV32-ILP32D-NEXT: call callee_double_on_stack_exhausted_gprs_fprs ; RV32-ILP32D-NEXT: lw ra, 12(sp) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll @@ -41,9 +41,9 @@ define i32 @callee_float_in_fpr_exhausted_gprs(i64 %a, i64 %b, i64 %c, i64 %d, i32 %e, float %f) nounwind { ; RV32-ILP32FD-LABEL: callee_float_in_fpr_exhausted_gprs: ; RV32-ILP32FD: # %bb.0: -; RV32-ILP32FD-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ILP32FD-NEXT: lw a1, 0(sp) -; RV32-ILP32FD-NEXT: add a0, a1, a0 +; RV32-ILP32FD-NEXT: lw a0, 0(sp) +; RV32-ILP32FD-NEXT: fcvt.w.s a1, fa0, rtz +; RV32-ILP32FD-NEXT: add a0, a0, a1 ; RV32-ILP32FD-NEXT: ret %f_fptosi = fptosi float %f to i32 %1 = add i32 %e, %f_fptosi @@ -55,18 +55,18 @@ ; RV32-ILP32FD: # %bb.0: ; RV32-ILP32FD-NEXT: addi sp, sp, -16 ; RV32-ILP32FD-NEXT: sw ra, 12(sp) -; RV32-ILP32FD-NEXT: addi a0, zero, 5 -; RV32-ILP32FD-NEXT: sw a0, 0(sp) +; RV32-ILP32FD-NEXT: addi a1, zero, 5 ; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI3_0) ; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI3_0) ; RV32-ILP32FD-NEXT: flw fa0, 0(a0) ; RV32-ILP32FD-NEXT: addi a0, zero, 1 -; RV32-ILP32FD-NEXT: mv a1, zero ; RV32-ILP32FD-NEXT: addi a2, zero, 2 -; RV32-ILP32FD-NEXT: mv a3, zero ; RV32-ILP32FD-NEXT: addi a4, zero, 3 -; RV32-ILP32FD-NEXT: mv a5, zero ; RV32-ILP32FD-NEXT: addi a6, zero, 4 +; RV32-ILP32FD-NEXT: sw a1, 0(sp) +; RV32-ILP32FD-NEXT: mv a1, zero +; RV32-ILP32FD-NEXT: mv a3, zero +; RV32-ILP32FD-NEXT: mv a5, zero ; RV32-ILP32FD-NEXT: mv a7, zero ; RV32-ILP32FD-NEXT: call callee_float_in_fpr_exhausted_gprs ; RV32-ILP32FD-NEXT: lw ra, 12(sp) @@ -81,10 +81,10 @@ define i32 @callee_float_in_gpr_exhausted_fprs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) nounwind { ; RV32-ILP32FD-LABEL: callee_float_in_gpr_exhausted_fprs: ; RV32-ILP32FD: # %bb.0: -; RV32-ILP32FD-NEXT: fcvt.w.s a1, fa7, rtz ; RV32-ILP32FD-NEXT: fmv.w.x ft0, a0 -; RV32-ILP32FD-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-ILP32FD-NEXT: add a0, a1, a0 +; RV32-ILP32FD-NEXT: fcvt.w.s a0, fa7, rtz +; RV32-ILP32FD-NEXT: fcvt.w.s a1, ft0, rtz +; RV32-ILP32FD-NEXT: add a0, a0, a1 ; RV32-ILP32FD-NEXT: ret %h_fptosi = fptosi float %h to i32 %i_fptosi = fptosi float %i to i32 @@ -99,21 +99,21 @@ ; RV32-ILP32FD-NEXT: sw ra, 12(sp) ; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_0) ; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_0) -; RV32-ILP32FD-NEXT: lui a1, %hi(.LCPI5_1) -; RV32-ILP32FD-NEXT: addi a1, a1, %lo(.LCPI5_1) -; RV32-ILP32FD-NEXT: lui a2, %hi(.LCPI5_2) -; RV32-ILP32FD-NEXT: addi a2, a2, %lo(.LCPI5_2) -; RV32-ILP32FD-NEXT: lui a3, %hi(.LCPI5_3) -; RV32-ILP32FD-NEXT: addi a3, a3, %lo(.LCPI5_3) -; RV32-ILP32FD-NEXT: lui a4, %hi(.LCPI5_4) -; RV32-ILP32FD-NEXT: addi a4, a4, %lo(.LCPI5_4) -; RV32-ILP32FD-NEXT: lui a5, %hi(.LCPI5_5) -; RV32-ILP32FD-NEXT: addi a5, a5, %lo(.LCPI5_5) -; RV32-ILP32FD-NEXT: flw fa0, 0(a5) -; RV32-ILP32FD-NEXT: flw fa1, 0(a4) -; RV32-ILP32FD-NEXT: flw fa2, 0(a3) -; RV32-ILP32FD-NEXT: flw fa3, 0(a2) -; RV32-ILP32FD-NEXT: flw fa4, 0(a1) +; RV32-ILP32FD-NEXT: flw fa0, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_1) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_1) +; RV32-ILP32FD-NEXT: flw fa1, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_2) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_2) +; RV32-ILP32FD-NEXT: flw fa2, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_3) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_3) +; RV32-ILP32FD-NEXT: flw fa3, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_4) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_4) +; RV32-ILP32FD-NEXT: flw fa4, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_5) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_5) ; RV32-ILP32FD-NEXT: flw fa5, 0(a0) ; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI5_6) ; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI5_6) @@ -151,39 +151,39 @@ ; RV32-ILP32FD: # %bb.0: ; RV32-ILP32FD-NEXT: addi sp, sp, -16 ; RV32-ILP32FD-NEXT: sw ra, 12(sp) -; RV32-ILP32FD-NEXT: lui a0, 267520 -; RV32-ILP32FD-NEXT: sw a0, 0(sp) +; RV32-ILP32FD-NEXT: lui a1, 267520 ; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-ILP32FD-NEXT: addi a6, a0, %lo(.LCPI7_0) -; RV32-ILP32FD-NEXT: lui a1, %hi(.LCPI7_1) -; RV32-ILP32FD-NEXT: addi a1, a1, %lo(.LCPI7_1) -; RV32-ILP32FD-NEXT: lui a2, %hi(.LCPI7_2) -; RV32-ILP32FD-NEXT: addi a2, a2, %lo(.LCPI7_2) -; RV32-ILP32FD-NEXT: lui a3, %hi(.LCPI7_3) -; RV32-ILP32FD-NEXT: addi a3, a3, %lo(.LCPI7_3) -; RV32-ILP32FD-NEXT: lui a4, %hi(.LCPI7_4) -; RV32-ILP32FD-NEXT: addi a4, a4, %lo(.LCPI7_4) -; RV32-ILP32FD-NEXT: lui a5, %hi(.LCPI7_5) -; RV32-ILP32FD-NEXT: addi a5, a5, %lo(.LCPI7_5) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_0) +; RV32-ILP32FD-NEXT: flw fa0, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_1) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_1) +; RV32-ILP32FD-NEXT: flw fa1, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_2) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_2) +; RV32-ILP32FD-NEXT: flw fa2, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_3) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_3) +; RV32-ILP32FD-NEXT: flw fa3, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_4) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_4) +; RV32-ILP32FD-NEXT: flw fa4, 0(a0) +; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_5) +; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_5) +; RV32-ILP32FD-NEXT: flw fa5, 0(a0) ; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_6) ; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_6) -; RV32-ILP32FD-NEXT: flw fa0, 0(a0) -; RV32-ILP32FD-NEXT: flw fa1, 0(a5) -; RV32-ILP32FD-NEXT: flw fa2, 0(a4) -; RV32-ILP32FD-NEXT: flw fa3, 0(a3) -; RV32-ILP32FD-NEXT: flw fa4, 0(a2) -; RV32-ILP32FD-NEXT: flw fa5, 0(a1) -; RV32-ILP32FD-NEXT: flw fa6, 0(a6) +; RV32-ILP32FD-NEXT: flw fa6, 0(a0) ; RV32-ILP32FD-NEXT: lui a0, %hi(.LCPI7_7) ; RV32-ILP32FD-NEXT: addi a0, a0, %lo(.LCPI7_7) ; RV32-ILP32FD-NEXT: flw fa7, 0(a0) ; RV32-ILP32FD-NEXT: addi a0, zero, 1 -; RV32-ILP32FD-NEXT: mv a1, zero ; RV32-ILP32FD-NEXT: addi a2, zero, 3 -; RV32-ILP32FD-NEXT: mv a3, zero ; RV32-ILP32FD-NEXT: addi a4, zero, 5 -; RV32-ILP32FD-NEXT: mv a5, zero ; RV32-ILP32FD-NEXT: addi a6, zero, 7 +; RV32-ILP32FD-NEXT: sw a1, 0(sp) +; RV32-ILP32FD-NEXT: mv a1, zero +; RV32-ILP32FD-NEXT: mv a3, zero +; RV32-ILP32FD-NEXT: mv a5, zero ; RV32-ILP32FD-NEXT: mv a7, zero ; RV32-ILP32FD-NEXT: call callee_float_on_stack_exhausted_gprs_fprs ; RV32-ILP32FD-NEXT: lw ra, 12(sp) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -48,22 +48,22 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i128 %d, i32 %e, i32 %f, i128 %g, i32 %h) nounwind { ; RV64I-LABEL: callee_many_scalars: ; RV64I: # %bb.0: -; RV64I-NEXT: ld t0, 0(sp) -; RV64I-NEXT: xor a4, a4, t0 -; RV64I-NEXT: xor a3, a3, a7 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -1 -; RV64I-NEXT: and a1, a1, a4 -; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw t0, 8(sp) +; RV64I-NEXT: ld t1, 0(sp) +; RV64I-NEXT: andi t2, a0, 255 +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: add a0, t2, a0 ; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: seqz a1, a3 +; RV64I-NEXT: xor a1, a4, t1 +; RV64I-NEXT: xor a2, a3, a7 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: seqz a1, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: add a0, a0, a5 ; RV64I-NEXT: add a0, a0, a6 -; RV64I-NEXT: lw a1, 8(sp) -; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: addw a0, a0, t0 ; RV64I-NEXT: ret %a_ext = zext i8 %a to i32 %b_ext = zext i16 %b to i32 @@ -85,15 +85,15 @@ ; RV64I-NEXT: sd ra, 24(sp) ; RV64I-NEXT: addi a0, zero, 8 ; RV64I-NEXT: sd a0, 8(sp) -; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: addi a0, zero, 1 ; RV64I-NEXT: addi a1, zero, 2 ; RV64I-NEXT: addi a2, zero, 3 ; RV64I-NEXT: addi a3, zero, 4 -; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: addi a5, zero, 5 ; RV64I-NEXT: addi a6, zero, 6 ; RV64I-NEXT: addi a7, zero, 7 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call callee_many_scalars ; RV64I-NEXT: ld ra, 24(sp) ; RV64I-NEXT: addi sp, sp, 32 @@ -107,20 +107,20 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-LABEL: callee_large_scalars: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a2, 24(a1) -; RV64I-NEXT: ld a3, 24(a0) -; RV64I-NEXT: xor a2, a3, a2 -; RV64I-NEXT: ld a3, 8(a1) -; RV64I-NEXT: ld a4, 8(a0) -; RV64I-NEXT: xor a3, a4, a3 +; RV64I-NEXT: ld a6, 0(a1) +; RV64I-NEXT: ld a7, 0(a0) +; RV64I-NEXT: ld a4, 8(a1) +; RV64I-NEXT: ld a5, 24(a1) +; RV64I-NEXT: ld a2, 24(a0) +; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a1, 16(a1) +; RV64I-NEXT: ld a0, 16(a0) +; RV64I-NEXT: xor a2, a2, a5 +; RV64I-NEXT: xor a3, a3, a4 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: ld a3, 16(a1) -; RV64I-NEXT: ld a4, 16(a0) -; RV64I-NEXT: xor a3, a4, a3 -; RV64I-NEXT: ld a1, 0(a1) -; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: xor a1, a7, a6 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret @@ -134,18 +134,18 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) -; RV64I-NEXT: addi a0, zero, 2 -; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: addi a0, zero, 2 +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd zero, 48(sp) ; RV64I-NEXT: sd zero, 40(sp) -; RV64I-NEXT: addi a0, zero, 1 -; RV64I-NEXT: sd a0, 32(sp) +; RV64I-NEXT: addi a2, zero, 1 ; RV64I-NEXT: addi a0, sp, 32 ; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: sd a2, 32(sp) ; RV64I-NEXT: call callee_large_scalars ; RV64I-NEXT: ld ra, 72(sp) ; RV64I-NEXT: addi sp, sp, 80 @@ -162,20 +162,20 @@ ; RV64I-LABEL: callee_large_scalars_exhausted_regs: ; RV64I: # %bb.0: ; RV64I-NEXT: ld a0, 8(sp) -; RV64I-NEXT: ld a1, 24(a0) -; RV64I-NEXT: ld a2, 24(a7) -; RV64I-NEXT: xor a1, a2, a1 -; RV64I-NEXT: ld a2, 8(a0) -; RV64I-NEXT: ld a3, 8(a7) -; RV64I-NEXT: xor a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: ld a2, 16(a0) -; RV64I-NEXT: ld a3, 16(a7) -; RV64I-NEXT: xor a2, a3, a2 -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: ld a3, 0(a7) -; RV64I-NEXT: xor a0, a3, a0 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ld a6, 0(a0) +; RV64I-NEXT: ld t0, 0(a7) +; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a4, 24(a0) +; RV64I-NEXT: ld a5, 24(a7) +; RV64I-NEXT: ld a1, 8(a7) +; RV64I-NEXT: ld a0, 16(a0) +; RV64I-NEXT: ld a2, 16(a7) +; RV64I-NEXT: xor a4, a5, a4 +; RV64I-NEXT: xor a1, a1, a3 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: xor a0, a2, a0 +; RV64I-NEXT: xor a2, t0, a6 +; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret @@ -193,16 +193,15 @@ ; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: addi a0, zero, 9 ; RV64I-NEXT: sd a0, 0(sp) -; RV64I-NEXT: addi a0, zero, 10 -; RV64I-NEXT: sd a0, 16(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 32(sp) ; RV64I-NEXT: sd zero, 24(sp) +; RV64I-NEXT: addi a0, zero, 10 +; RV64I-NEXT: sd a0, 16(sp) ; RV64I-NEXT: sd zero, 72(sp) ; RV64I-NEXT: sd zero, 64(sp) ; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: addi a0, zero, 8 -; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: addi t0, zero, 8 ; RV64I-NEXT: addi a7, sp, 48 ; RV64I-NEXT: addi a0, zero, 1 ; RV64I-NEXT: addi a1, zero, 2 @@ -211,6 +210,7 @@ ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: addi a5, zero, 6 ; RV64I-NEXT: addi a6, zero, 7 +; RV64I-NEXT: sd t0, 48(sp) ; RV64I-NEXT: call callee_large_scalars_exhausted_regs ; RV64I-NEXT: ld ra, 88(sp) ; RV64I-NEXT: addi sp, sp, 96 @@ -277,9 +277,9 @@ define i64 @callee_large_struct(%struct.large* byval align 8 %a) nounwind { ; RV64I-LABEL: callee_large_struct: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a1, 24(a0) -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a1, 0(a0) +; RV64I-NEXT: ld a0, 24(a0) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %1 = getelementptr inbounds %struct.large, %struct.large* %a, i64 0, i32 0 %2 = getelementptr inbounds %struct.large, %struct.large* %a, i64 0, i32 3 @@ -296,16 +296,16 @@ ; RV64I-NEXT: sd ra, 72(sp) ; RV64I-NEXT: addi a0, zero, 1 ; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: addi a1, zero, 2 +; RV64I-NEXT: sd a1, 48(sp) +; RV64I-NEXT: addi a2, zero, 3 +; RV64I-NEXT: sd a2, 56(sp) +; RV64I-NEXT: addi a3, zero, 4 +; RV64I-NEXT: sd a3, 64(sp) ; RV64I-NEXT: sd a0, 8(sp) -; RV64I-NEXT: addi a0, zero, 2 -; RV64I-NEXT: sd a0, 48(sp) -; RV64I-NEXT: sd a0, 16(sp) -; RV64I-NEXT: addi a0, zero, 3 -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a0, 24(sp) -; RV64I-NEXT: addi a0, zero, 4 -; RV64I-NEXT: sd a0, 64(sp) -; RV64I-NEXT: sd a0, 32(sp) +; RV64I-NEXT: sd a1, 16(sp) +; RV64I-NEXT: sd a2, 24(sp) +; RV64I-NEXT: sd a3, 32(sp) ; RV64I-NEXT: addi a0, sp, 8 ; RV64I-NEXT: call callee_large_struct ; RV64I-NEXT: ld ra, 72(sp) @@ -332,15 +332,15 @@ ; should only be 8-byte aligned ; RV64I-LABEL: callee_aligned_stack: ; RV64I: # %bb.0: -; RV64I-NEXT: add a0, a5, a7 +; RV64I-NEXT: ld a0, 40(sp) ; RV64I-NEXT: ld a1, 0(sp) -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ld a1, 16(sp) -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ld a1, 32(sp) -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ld a1, 40(sp) -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a2, 16(sp) +; RV64I-NEXT: ld a3, 32(sp) +; RV64I-NEXT: add a4, a5, a7 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %f_trunc = trunc i128 %f to i64 %1 = add i64 %f_trunc, %g @@ -366,19 +366,19 @@ ; RV64I-NEXT: sd a0, 40(sp) ; RV64I-NEXT: addi a0, zero, 10 ; RV64I-NEXT: sd a0, 32(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: addi a0, zero, 9 ; RV64I-NEXT: sd a0, 16(sp) -; RV64I-NEXT: addi a0, zero, 8 -; RV64I-NEXT: sd a0, 0(sp) -; RV64I-NEXT: sd zero, 24(sp) +; RV64I-NEXT: addi a6, zero, 8 ; RV64I-NEXT: addi a0, zero, 1 ; RV64I-NEXT: addi a1, zero, 2 ; RV64I-NEXT: addi a2, zero, 3 ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: addi a5, zero, 6 -; RV64I-NEXT: mv a6, zero ; RV64I-NEXT: addi a7, zero, 7 +; RV64I-NEXT: sd a6, 0(sp) +; RV64I-NEXT: mv a6, zero ; RV64I-NEXT: call callee_aligned_stack ; RV64I-NEXT: ld ra, 56(sp) ; RV64I-NEXT: addi sp, sp, 64 @@ -482,18 +482,18 @@ define void @callee_large_struct_ret(%struct.large* noalias sret %agg.result) nounwind { ; RV64I-LABEL: callee_large_struct_ret: ; RV64I: # %bb.0: -; RV64I-NEXT: addi a1, zero, 4 -; RV64I-NEXT: sw a1, 24(a0) -; RV64I-NEXT: addi a1, zero, 3 -; RV64I-NEXT: sw a1, 16(a0) -; RV64I-NEXT: addi a1, zero, 2 -; RV64I-NEXT: sw a1, 8(a0) -; RV64I-NEXT: sw zero, 28(a0) -; RV64I-NEXT: sw zero, 20(a0) -; RV64I-NEXT: sw zero, 12(a0) ; RV64I-NEXT: sw zero, 4(a0) ; RV64I-NEXT: addi a1, zero, 1 ; RV64I-NEXT: sw a1, 0(a0) +; RV64I-NEXT: sw zero, 12(a0) +; RV64I-NEXT: addi a1, zero, 2 +; RV64I-NEXT: sw a1, 8(a0) +; RV64I-NEXT: sw zero, 20(a0) +; RV64I-NEXT: addi a1, zero, 3 +; RV64I-NEXT: sw a1, 16(a0) +; RV64I-NEXT: sw zero, 28(a0) +; RV64I-NEXT: addi a1, zero, 4 +; RV64I-NEXT: sw a1, 24(a0) ; RV64I-NEXT: ret %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i64 0, i32 0 store i64 1, i64* %a, align 4 @@ -513,9 +513,9 @@ ; RV64I-NEXT: sd ra, 40(sp) ; RV64I-NEXT: addi a0, sp, 8 ; RV64I-NEXT: call callee_large_struct_ret -; RV64I-NEXT: ld a0, 32(sp) -; RV64I-NEXT: ld a1, 8(sp) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a0, 8(sp) +; RV64I-NEXT: ld a1, 32(sp) +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ld ra, 40(sp) ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll @@ -108,15 +108,15 @@ ; RV64I-FPELIM: # %bb.0: ; RV64I-FPELIM-NEXT: addi sp, sp, -16 ; RV64I-FPELIM-NEXT: sd ra, 8(sp) -; RV64I-FPELIM-NEXT: lui a0, 264704 -; RV64I-FPELIM-NEXT: sd a0, 0(sp) +; RV64I-FPELIM-NEXT: lui a1, 264704 ; RV64I-FPELIM-NEXT: addi a0, zero, 1 -; RV64I-FPELIM-NEXT: mv a1, zero ; RV64I-FPELIM-NEXT: addi a2, zero, 2 -; RV64I-FPELIM-NEXT: mv a3, zero ; RV64I-FPELIM-NEXT: addi a4, zero, 3 -; RV64I-FPELIM-NEXT: mv a5, zero ; RV64I-FPELIM-NEXT: addi a6, zero, 4 +; RV64I-FPELIM-NEXT: sd a1, 0(sp) +; RV64I-FPELIM-NEXT: mv a1, zero +; RV64I-FPELIM-NEXT: mv a3, zero +; RV64I-FPELIM-NEXT: mv a5, zero ; RV64I-FPELIM-NEXT: mv a7, zero ; RV64I-FPELIM-NEXT: call callee_float_on_stack ; RV64I-FPELIM-NEXT: ld ra, 8(sp) @@ -129,15 +129,15 @@ ; RV64I-WITHFP-NEXT: sd ra, 24(sp) ; RV64I-WITHFP-NEXT: sd s0, 16(sp) ; RV64I-WITHFP-NEXT: addi s0, sp, 32 -; RV64I-WITHFP-NEXT: lui a0, 264704 -; RV64I-WITHFP-NEXT: sd a0, 0(sp) +; RV64I-WITHFP-NEXT: lui a1, 264704 ; RV64I-WITHFP-NEXT: addi a0, zero, 1 -; RV64I-WITHFP-NEXT: mv a1, zero ; RV64I-WITHFP-NEXT: addi a2, zero, 2 -; RV64I-WITHFP-NEXT: mv a3, zero ; RV64I-WITHFP-NEXT: addi a4, zero, 3 -; RV64I-WITHFP-NEXT: mv a5, zero ; RV64I-WITHFP-NEXT: addi a6, zero, 4 +; RV64I-WITHFP-NEXT: sd a1, 0(sp) +; RV64I-WITHFP-NEXT: mv a1, zero +; RV64I-WITHFP-NEXT: mv a3, zero +; RV64I-WITHFP-NEXT: mv a5, zero ; RV64I-WITHFP-NEXT: mv a7, zero ; RV64I-WITHFP-NEXT: call callee_float_on_stack ; RV64I-WITHFP-NEXT: ld s0, 16(sp) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll @@ -33,15 +33,15 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) ; RV32IF-NEXT: sw a0, 4(sp) -; RV32IF-NEXT: lui a0, 264704 -; RV32IF-NEXT: sw a0, 0(sp) +; RV32IF-NEXT: lui a1, 264704 ; RV32IF-NEXT: addi a0, zero, 1 -; RV32IF-NEXT: mv a1, zero ; RV32IF-NEXT: addi a2, zero, 2 -; RV32IF-NEXT: mv a3, zero ; RV32IF-NEXT: addi a4, zero, 3 -; RV32IF-NEXT: mv a5, zero ; RV32IF-NEXT: addi a6, zero, 4 +; RV32IF-NEXT: sw a1, 0(sp) +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: mv a3, zero +; RV32IF-NEXT: mv a5, zero ; RV32IF-NEXT: mv a7, zero ; RV32IF-NEXT: call onstack_f32_noop ; RV32IF-NEXT: lw ra, 12(sp) @@ -56,19 +56,19 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: fsub.s ft2, ft1, ft0 -; RV32IF-NEXT: fsw ft2, 4(sp) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 -; RV32IF-NEXT: fsw ft0, 0(sp) +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: fadd.s ft2, ft1, ft0 +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-NEXT: fsw ft0, 4(sp) ; RV32IF-NEXT: addi a0, zero, 1 -; RV32IF-NEXT: mv a1, zero ; RV32IF-NEXT: addi a2, zero, 2 -; RV32IF-NEXT: mv a3, zero ; RV32IF-NEXT: addi a4, zero, 3 -; RV32IF-NEXT: mv a5, zero ; RV32IF-NEXT: addi a6, zero, 4 +; RV32IF-NEXT: fsw ft2, 0(sp) +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: mv a3, zero +; RV32IF-NEXT: mv a5, zero ; RV32IF-NEXT: mv a7, zero ; RV32IF-NEXT: call onstack_f32_noop ; RV32IF-NEXT: lw ra, 12(sp) diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll --- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll +++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll @@ -61,9 +61,9 @@ ; RV32I-SMALL-NEXT: sw ra, 12(sp) ; RV32I-SMALL-NEXT: lui a1, %hi(.Ltmp0) ; RV32I-SMALL-NEXT: addi a1, a1, %lo(.Ltmp0) +; RV32I-SMALL-NEXT: addi a2, zero, 101 ; RV32I-SMALL-NEXT: sw a1, 8(sp) -; RV32I-SMALL-NEXT: addi a1, zero, 101 -; RV32I-SMALL-NEXT: blt a0, a1, .LBB2_3 +; RV32I-SMALL-NEXT: blt a0, a2, .LBB2_3 ; RV32I-SMALL-NEXT: # %bb.1: # %if.then ; RV32I-SMALL-NEXT: lw a0, 8(sp) ; RV32I-SMALL-NEXT: jr a0 @@ -86,9 +86,9 @@ ; RV32I-MEDIUM-NEXT: # Label of block must be emitted ; RV32I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.Ltmp0) ; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.LBB2_5) +; RV32I-MEDIUM-NEXT: addi a2, zero, 101 ; RV32I-MEDIUM-NEXT: sw a1, 8(sp) -; RV32I-MEDIUM-NEXT: addi a1, zero, 101 -; RV32I-MEDIUM-NEXT: blt a0, a1, .LBB2_3 +; RV32I-MEDIUM-NEXT: blt a0, a2, .LBB2_3 ; RV32I-MEDIUM-NEXT: # %bb.1: # %if.then ; RV32I-MEDIUM-NEXT: lw a0, 8(sp) ; RV32I-MEDIUM-NEXT: jr a0 @@ -131,11 +131,11 @@ define float @lower_constantpool(float %a) nounwind { ; RV32I-SMALL-LABEL: lower_constantpool: ; RV32I-SMALL: # %bb.0: -; RV32I-SMALL-NEXT: fmv.w.x ft0, a0 -; RV32I-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-SMALL-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV32I-SMALL-NEXT: flw ft1, 0(a0) -; RV32I-SMALL-NEXT: fadd.s ft0, ft0, ft1 +; RV32I-SMALL-NEXT: lui a1, %hi(.LCPI3_0) +; RV32I-SMALL-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV32I-SMALL-NEXT: flw ft0, 0(a1) +; RV32I-SMALL-NEXT: fmv.w.x ft1, a0 +; RV32I-SMALL-NEXT: fadd.s ft0, ft1, ft0 ; RV32I-SMALL-NEXT: fmv.x.w a0, ft0 ; RV32I-SMALL-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/compress.ll b/llvm/test/CodeGen/RISCV/compress.ll --- a/llvm/test/CodeGen/RISCV/compress.ll +++ b/llvm/test/CodeGen/RISCV/compress.ll @@ -19,10 +19,10 @@ define i32 @simple_arith(i32 %a, i32 %b) nounwind { ; RV32IC-LABEL: simple_arith: -; RV32IC: c.srai a1, 9 -; RV32IC-NEXT: addi a2, a0, 1 +; RV32IC: addi a2, a0, 1 ; RV32IC-NEXT: c.andi a2, 11 ; RV32IC-NEXT: c.slli a2, 7 +; RV32IC-NEXT: c.srai a1, 9 ; RV32IC-NEXT: c.add a1, a2 ; RV32IC-NEXT: sub a0, a1, a0 ; RV32IC-NEXT: c.jr ra diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -457,8 +457,8 @@ ; ; RV64IM-LABEL: sdiv64_sext_operands: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a1 ; RV64IM-NEXT: sext.w a0, a0 +; RV64IM-NEXT: sext.w a1, a1 ; RV64IM-NEXT: div a0, a0, a1 ; RV64IM-NEXT: ret %1 = sext i32 %a to i64 diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -473,13 +473,13 @@ ; ; RV64IFD-LABEL: fmsub_d: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a2 -; RV64IFD-NEXT: lui a2, %hi(.LCPI15_0) -; RV64IFD-NEXT: addi a2, a2, %lo(.LCPI15_0) -; RV64IFD-NEXT: fld ft1, 0(a2) -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV64IFD-NEXT: lui a3, %hi(.LCPI15_0) +; RV64IFD-NEXT: addi a3, a3, %lo(.LCPI15_0) +; RV64IFD-NEXT: fld ft0, 0(a3) ; RV64IFD-NEXT: fmv.d.x ft1, a1 ; RV64IFD-NEXT: fmv.d.x ft2, a0 +; RV64IFD-NEXT: fmv.d.x ft3, a2 +; RV64IFD-NEXT: fadd.d ft0, ft3, ft0 ; RV64IFD-NEXT: fmsub.d ft0, ft2, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret @@ -496,18 +496,18 @@ ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) ; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) ; RV32IFD-NEXT: sw a4, 8(sp) ; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft2, 8(sp) ; RV32IFD-NEXT: lui a0, %hi(.LCPI16_0) ; RV32IFD-NEXT: addi a0, a0, %lo(.LCPI16_0) ; RV32IFD-NEXT: fld ft3, 0(a0) ; RV32IFD-NEXT: fadd.d ft2, ft2, ft3 ; RV32IFD-NEXT: fadd.d ft1, ft1, ft3 -; RV32IFD-NEXT: fnmadd.d ft0, ft1, ft0, ft2 +; RV32IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1 ; RV32IFD-NEXT: fsd ft0, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) @@ -516,15 +516,15 @@ ; ; RV64IFD-LABEL: fnmadd_d: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a2 -; RV64IFD-NEXT: lui a2, %hi(.LCPI16_0) -; RV64IFD-NEXT: addi a2, a2, %lo(.LCPI16_0) -; RV64IFD-NEXT: fld ft1, 0(a2) -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV64IFD-NEXT: fmv.d.x ft2, a0 -; RV64IFD-NEXT: fadd.d ft1, ft2, ft1 -; RV64IFD-NEXT: fmv.d.x ft2, a1 -; RV64IFD-NEXT: fnmadd.d ft0, ft1, ft2, ft0 +; RV64IFD-NEXT: lui a3, %hi(.LCPI16_0) +; RV64IFD-NEXT: addi a3, a3, %lo(.LCPI16_0) +; RV64IFD-NEXT: fld ft0, 0(a3) +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: fmv.d.x ft2, a2 +; RV64IFD-NEXT: fmv.d.x ft3, a0 +; RV64IFD-NEXT: fadd.d ft3, ft3, ft0 +; RV64IFD-NEXT: fadd.d ft0, ft2, ft0 +; RV64IFD-NEXT: fnmadd.d ft0, ft3, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %a_ = fadd double 0.0, %a @@ -561,13 +561,13 @@ ; ; RV64IFD-LABEL: fnmsub_d: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: lui a0, %hi(.LCPI17_0) -; RV64IFD-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV64IFD-NEXT: fld ft1, 0(a0) -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV64IFD-NEXT: lui a3, %hi(.LCPI17_0) +; RV64IFD-NEXT: addi a3, a3, %lo(.LCPI17_0) +; RV64IFD-NEXT: fld ft0, 0(a3) ; RV64IFD-NEXT: fmv.d.x ft1, a2 ; RV64IFD-NEXT: fmv.d.x ft2, a1 +; RV64IFD-NEXT: fmv.d.x ft3, a0 +; RV64IFD-NEXT: fadd.d ft0, ft3, ft0 ; RV64IFD-NEXT: fnmsub.d ft0, ft0, ft2, ft1 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll --- a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll @@ -118,9 +118,9 @@ ; ; RV64I-LABEL: fcopysign_fneg: ; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 ; RV64I-NEXT: addi a2, zero, -1 ; RV64I-NEXT: slli a2, a2, 63 -; RV64I-NEXT: not a1, a1 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: addi a2, a2, -1 ; RV64I-NEXT: and a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll @@ -385,11 +385,11 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft0 -; RV64IFD-NEXT: and a0, a0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: feq.d a0, ft1, ft1 +; RV64IFD-NEXT: feq.d a1, ft0, ft0 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: bnez a0, .LBB8_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: ld ra, 8(sp) @@ -712,11 +712,11 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft0 -; RV64IFD-NEXT: and a0, a0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: feq.d a0, ft1, ft1 +; RV64IFD-NEXT: feq.d a1, ft0, ft0 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: seqz a0, a0 ; RV64IFD-NEXT: bnez a0, .LBB15_2 ; RV64IFD-NEXT: # %bb.1: # %if.else diff --git a/llvm/test/CodeGen/RISCV/double-calling-conv.ll b/llvm/test/CodeGen/RISCV/double-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/double-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/double-calling-conv.ll @@ -76,16 +76,16 @@ ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) ; RV32IFD-NEXT: lui a0, 262510 -; RV32IFD-NEXT: addi a0, a0, 327 -; RV32IFD-NEXT: sw a0, 0(sp) +; RV32IFD-NEXT: addi a2, a0, 327 ; RV32IFD-NEXT: lui a0, 262446 ; RV32IFD-NEXT: addi a6, a0, 327 ; RV32IFD-NEXT: lui a0, 713032 ; RV32IFD-NEXT: addi a5, a0, -1311 ; RV32IFD-NEXT: addi a0, zero, 1 ; RV32IFD-NEXT: addi a1, zero, 2 -; RV32IFD-NEXT: mv a2, zero ; RV32IFD-NEXT: addi a3, zero, 3 +; RV32IFD-NEXT: sw a2, 0(sp) +; RV32IFD-NEXT: mv a2, zero ; RV32IFD-NEXT: mv a4, zero ; RV32IFD-NEXT: mv a7, a5 ; RV32IFD-NEXT: call callee_double_split_reg_stack @@ -120,20 +120,20 @@ ; RV32IFD-NEXT: lui a0, 262510 ; RV32IFD-NEXT: addi a0, a0, 327 ; RV32IFD-NEXT: sw a0, 4(sp) +; RV32IFD-NEXT: lui a0, 713032 +; RV32IFD-NEXT: addi a1, a0, -1311 +; RV32IFD-NEXT: sw a1, 0(sp) ; RV32IFD-NEXT: lui a0, 262574 ; RV32IFD-NEXT: addi a0, a0, 327 ; RV32IFD-NEXT: sw a0, 12(sp) -; RV32IFD-NEXT: lui a0, 713032 -; RV32IFD-NEXT: addi a0, a0, -1311 -; RV32IFD-NEXT: sw a0, 0(sp) -; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: addi a0, zero, 1 -; RV32IFD-NEXT: mv a1, zero ; RV32IFD-NEXT: addi a2, zero, 2 -; RV32IFD-NEXT: mv a3, zero ; RV32IFD-NEXT: addi a4, zero, 3 -; RV32IFD-NEXT: mv a5, zero ; RV32IFD-NEXT: addi a6, zero, 4 +; RV32IFD-NEXT: sw a1, 8(sp) +; RV32IFD-NEXT: mv a1, zero +; RV32IFD-NEXT: mv a3, zero +; RV32IFD-NEXT: mv a5, zero ; RV32IFD-NEXT: mv a7, zero ; RV32IFD-NEXT: call callee_double_stack ; RV32IFD-NEXT: lw ra, 28(sp) diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -257,9 +257,9 @@ ; ; RV64IFD-LABEL: fmv_d_x: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %1 = bitcast i64 %a to double diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll @@ -197,11 +197,11 @@ ; ; RV64IFD-LABEL: fcmp_ord: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft0 -; RV64IFD-NEXT: and a0, a0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: feq.d a0, ft1, ft1 +; RV64IFD-NEXT: feq.d a1, ft0, ft0 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: ret %1 = fcmp ord double %a, %b %2 = zext i1 %1 to i32 @@ -397,11 +397,11 @@ ; ; RV64IFD-LABEL: fcmp_uno: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft0 -; RV64IFD-NEXT: and a0, a0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: feq.d a0, ft1, ft1 +; RV64IFD-NEXT: feq.d a1, ft0, ft0 +; RV64IFD-NEXT: and a0, a1, a0 ; RV64IFD-NEXT: seqz a0, a0 ; RV64IFD-NEXT: ret %1 = fcmp uno double %a, %b diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -49,11 +49,11 @@ ; ; RV64IFD-LABEL: double_imm_op: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IFD-NEXT: addi a0, a0, %lo(.LCPI1_0) -; RV64IFD-NEXT: fld ft1, 0(a0) -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV64IFD-NEXT: lui a1, %hi(.LCPI1_0) +; RV64IFD-NEXT: addi a1, a1, %lo(.LCPI1_0) +; RV64IFD-NEXT: fld ft0, 0(a1) +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %1 = fadd double %a, 1.0 diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -111,28 +111,24 @@ ; RV32IFD-NEXT: sw ra, 28(sp) ; RV32IFD-NEXT: sw s0, 24(sp) ; RV32IFD-NEXT: sw s1, 20(sp) -; RV32IFD-NEXT: sw s2, 16(sp) -; RV32IFD-NEXT: sw s3, 12(sp) ; RV32IFD-NEXT: mv s0, a1 ; RV32IFD-NEXT: mv s1, a0 ; RV32IFD-NEXT: call sin -; RV32IFD-NEXT: mv s2, a0 -; RV32IFD-NEXT: mv s3, a1 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fsd ft0, 0(sp) ; RV32IFD-NEXT: mv a0, s1 ; RV32IFD-NEXT: mv a1, s0 ; RV32IFD-NEXT: call cos -; RV32IFD-NEXT: sw a0, 0(sp) -; RV32IFD-NEXT: sw a1, 4(sp) -; RV32IFD-NEXT: fld ft0, 0(sp) -; RV32IFD-NEXT: sw s2, 0(sp) -; RV32IFD-NEXT: sw s3, 4(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: fld ft1, 0(sp) ; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 -; RV32IFD-NEXT: fsd ft0, 0(sp) -; RV32IFD-NEXT: lw a0, 0(sp) -; RV32IFD-NEXT: lw a1, 4(sp) -; RV32IFD-NEXT: lw s3, 12(sp) -; RV32IFD-NEXT: lw s2, 16(sp) +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw s1, 20(sp) ; RV32IFD-NEXT: lw s0, 24(sp) ; RV32IFD-NEXT: lw ra, 28(sp) @@ -144,17 +140,16 @@ ; RV64IFD-NEXT: addi sp, sp, -32 ; RV64IFD-NEXT: sd ra, 24(sp) ; RV64IFD-NEXT: sd s0, 16(sp) -; RV64IFD-NEXT: sd s1, 8(sp) ; RV64IFD-NEXT: mv s0, a0 ; RV64IFD-NEXT: call sin -; RV64IFD-NEXT: mv s1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fsd ft0, 8(sp) ; RV64IFD-NEXT: mv a0, s0 ; RV64IFD-NEXT: call cos ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fmv.d.x ft1, s1 +; RV64IFD-NEXT: fld ft1, 8(sp) ; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 -; RV64IFD-NEXT: ld s1, 8(sp) ; RV64IFD-NEXT: ld s0, 16(sp) ; RV64IFD-NEXT: ld ra, 24(sp) ; RV64IFD-NEXT: addi sp, sp, 32 @@ -350,17 +345,17 @@ ; RV32IFD-LABEL: fmuladd_f64: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: fmul.d ft0, ft1, ft0 -; RV32IFD-NEXT: sw a4, 8(sp) -; RV32IFD-NEXT: sw a5, 12(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: fmul.d ft1, ft2, ft1 +; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 ; RV32IFD-NEXT: fsd ft0, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) @@ -369,11 +364,11 @@ ; ; RV64IFD-LABEL: fmuladd_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fmul.d ft0, ft1, ft0 -; RV64IFD-NEXT: fmv.d.x ft1, a2 -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV64IFD-NEXT: fmv.d.x ft0, a2 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: fmv.d.x ft2, a0 +; RV64IFD-NEXT: fmul.d ft1, ft2, ft1 +; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll --- a/llvm/test/CodeGen/RISCV/double-mem.ll +++ b/llvm/test/CodeGen/RISCV/double-mem.ll @@ -8,9 +8,9 @@ ; RV32IFD-LABEL: fld: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: fld ft0, 24(a0) -; RV32IFD-NEXT: fld ft1, 0(a0) -; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV32IFD-NEXT: fld ft0, 0(a0) +; RV32IFD-NEXT: fld ft1, 24(a0) +; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 ; RV32IFD-NEXT: fsd ft0, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) @@ -19,9 +19,9 @@ ; ; RV64IFD-LABEL: fld: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fld ft0, 24(a0) -; RV64IFD-NEXT: fld ft1, 0(a0) -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fld ft0, 0(a0) +; RV64IFD-NEXT: fld ft1, 24(a0) +; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %1 = load double, double* %a @@ -44,8 +44,8 @@ ; RV32IFD-NEXT: sw a2, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) ; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 -; RV32IFD-NEXT: fsd ft0, 64(a0) ; RV32IFD-NEXT: fsd ft0, 0(a0) +; RV32IFD-NEXT: fsd ft0, 64(a0) ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -54,8 +54,8 @@ ; RV64IFD-NEXT: fmv.d.x ft0, a2 ; RV64IFD-NEXT: fmv.d.x ft1, a1 ; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 -; RV64IFD-NEXT: fsd ft0, 64(a0) ; RV64IFD-NEXT: fsd ft0, 0(a0) +; RV64IFD-NEXT: fsd ft0, 64(a0) ; RV64IFD-NEXT: ret ; Use %b and %c in an FP op to ensure floating point registers are used, even ; for the soft float ABI @@ -100,10 +100,10 @@ ; RV64IFD-NEXT: lui a0, %hi(G) ; RV64IFD-NEXT: fld ft1, %lo(G)(a0) ; RV64IFD-NEXT: fsd ft0, %lo(G)(a0) -; RV64IFD-NEXT: addi a0, a0, %lo(G) -; RV64IFD-NEXT: fld ft1, 72(a0) -; RV64IFD-NEXT: fsd ft0, 72(a0) +; RV64IFD-NEXT: addi a1, a0, %lo(G) +; RV64IFD-NEXT: fld ft1, 72(a1) ; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: fsd ft0, 72(a1) ; RV64IFD-NEXT: ret ; Use %a and %b in an FP op to ensure floating point registers are used, even ; for the soft float ABI @@ -136,14 +136,14 @@ ; ; RV64IFD-LABEL: fld_fsd_constant: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: lui a0, 56 -; RV64IFD-NEXT: addiw a0, a0, -1353 -; RV64IFD-NEXT: slli a0, a0, 14 -; RV64IFD-NEXT: fld ft1, -273(a0) -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV64IFD-NEXT: fsd ft0, -273(a0) +; RV64IFD-NEXT: lui a1, 56 +; RV64IFD-NEXT: addiw a1, a1, -1353 +; RV64IFD-NEXT: slli a1, a1, 14 +; RV64IFD-NEXT: fld ft0, -273(a1) +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: fsd ft0, -273(a1) ; RV64IFD-NEXT: ret %1 = inttoptr i32 3735928559 to double* %2 = load volatile double, double* %1 @@ -159,22 +159,18 @@ ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -32 ; RV32IFD-NEXT: sw ra, 28(sp) -; RV32IFD-NEXT: sw s0, 24(sp) -; RV32IFD-NEXT: sw s1, 20(sp) -; RV32IFD-NEXT: mv s0, a1 -; RV32IFD-NEXT: mv s1, a0 -; RV32IFD-NEXT: addi a0, sp, 8 -; RV32IFD-NEXT: call notdead -; RV32IFD-NEXT: sw s1, 0(sp) -; RV32IFD-NEXT: sw s0, 4(sp) -; RV32IFD-NEXT: fld ft0, 0(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: fsd ft0, 0(sp) -; RV32IFD-NEXT: lw a0, 0(sp) -; RV32IFD-NEXT: lw a1, 4(sp) -; RV32IFD-NEXT: lw s1, 20(sp) -; RV32IFD-NEXT: lw s0, 24(sp) +; RV32IFD-NEXT: addi a0, sp, 16 +; RV32IFD-NEXT: call notdead +; RV32IFD-NEXT: fld ft0, 16(sp) +; RV32IFD-NEXT: fld ft1, 0(sp) +; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw ra, 28(sp) ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -183,15 +179,14 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -32 ; RV64IFD-NEXT: sd ra, 24(sp) -; RV64IFD-NEXT: sd s0, 16(sp) -; RV64IFD-NEXT: mv s0, a0 -; RV64IFD-NEXT: addi a0, sp, 8 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fsd ft0, 8(sp) +; RV64IFD-NEXT: addi a0, sp, 16 ; RV64IFD-NEXT: call notdead -; RV64IFD-NEXT: fmv.d.x ft0, s0 +; RV64IFD-NEXT: fld ft0, 16(sp) ; RV64IFD-NEXT: fld ft1, 8(sp) -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 ; RV64IFD-NEXT: fmv.x.d a0, ft0 -; RV64IFD-NEXT: ld s0, 16(sp) ; RV64IFD-NEXT: ld ra, 24(sp) ; RV64IFD-NEXT: addi sp, sp, 32 ; RV64IFD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/double-previous-failure.ll b/llvm/test/CodeGen/RISCV/double-previous-failure.ll --- a/llvm/test/CodeGen/RISCV/double-previous-failure.ll +++ b/llvm/test/CodeGen/RISCV/double-previous-failure.ll @@ -17,8 +17,8 @@ ; RV32IFD: # %bb.0: # %entry ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) -; RV32IFD-NEXT: mv a0, zero ; RV32IFD-NEXT: lui a1, 262144 +; RV32IFD-NEXT: mv a0, zero ; RV32IFD-NEXT: call test ; RV32IFD-NEXT: sw a0, 0(sp) ; RV32IFD-NEXT: sw a1, 4(sp) diff --git a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll @@ -298,23 +298,23 @@ ; RV32IFD-LABEL: select_fcmp_ueq: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 +; RV32IFD-NEXT: feq.d a0, ft1, ft0 ; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: feq.d a2, ft1, ft1 +; RV32IFD-NEXT: and a1, a2, a1 +; RV32IFD-NEXT: seqz a1, a1 +; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: bnez a0, .LBB8_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fmv.d ft0, ft1 +; RV32IFD-NEXT: fmv.d ft1, ft0 ; RV32IFD-NEXT: .LBB8_2: -; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: fsd ft1, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: addi sp, sp, 16 @@ -322,14 +322,14 @@ ; ; RV64IFD-LABEL: select_fcmp_ueq: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 ; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: seqz a0, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: feq.d a0, ft0, ft1 +; RV64IFD-NEXT: feq.d a1, ft1, ft1 +; RV64IFD-NEXT: feq.d a2, ft0, ft0 +; RV64IFD-NEXT: and a1, a2, a1 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: or a0, a0, a1 ; RV64IFD-NEXT: bnez a0, .LBB8_2 ; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fmv.d ft0, ft1 @@ -604,12 +604,12 @@ ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: bnez a0, .LBB16_2 +; RV32IFD-NEXT: feq.d a1, ft1, ft0 +; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: bnez a1, .LBB16_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a4, a5 +; RV32IFD-NEXT: mv a0, a5 ; RV32IFD-NEXT: .LBB16_2: -; RV32IFD-NEXT: mv a0, a4 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -617,12 +617,12 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft0, a1 ; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: feq.d a0, ft1, ft0 -; RV64IFD-NEXT: bnez a0, .LBB16_2 +; RV64IFD-NEXT: feq.d a1, ft1, ft0 +; RV64IFD-NEXT: mv a0, a2 +; RV64IFD-NEXT: bnez a1, .LBB16_2 ; RV64IFD-NEXT: # %bb.1: -; RV64IFD-NEXT: mv a2, a3 +; RV64IFD-NEXT: mv a0, a3 ; RV64IFD-NEXT: .LBB16_2: -; RV64IFD-NEXT: mv a0, a2 ; RV64IFD-NEXT: ret %1 = fcmp oeq double %a, %b %2 = select i1 %1, i32 %c, i32 %d diff --git a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll --- a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll +++ b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll @@ -37,10 +37,10 @@ ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) +; RV64IFD-NEXT: slli a2, a1, 32 +; RV64IFD-NEXT: srli a2, a2, 32 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: slli a0, a1, 32 -; RV64IFD-NEXT: srli a0, a0, 32 -; RV64IFD-NEXT: beqz a0, .LBB0_2 +; RV64IFD-NEXT: beqz a2, .LBB0_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: addi a1, a1, -1 ; RV64IFD-NEXT: fmv.x.d a0, ft0 diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -339,26 +339,26 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind { ; RV32IF-LABEL: fmsub_s: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a2 -; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IF-NEXT: addi a2, a2, %lo(.LCPI15_0) -; RV32IF-NEXT: flw ft1, 0(a2) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IF-NEXT: addi a3, a3, %lo(.LCPI15_0) +; RV32IF-NEXT: flw ft0, 0(a3) ; RV32IF-NEXT: fmv.w.x ft1, a1 ; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fmv.w.x ft3, a2 +; RV32IF-NEXT: fadd.s ft0, ft3, ft0 ; RV32IF-NEXT: fmsub.s ft0, ft2, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fmsub_s: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a2 -; RV64IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV64IF-NEXT: addi a2, a2, %lo(.LCPI15_0) -; RV64IF-NEXT: flw ft1, 0(a2) -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 +; RV64IF-NEXT: lui a3, %hi(.LCPI15_0) +; RV64IF-NEXT: addi a3, a3, %lo(.LCPI15_0) +; RV64IF-NEXT: flw ft0, 0(a3) ; RV64IF-NEXT: fmv.w.x ft1, a1 ; RV64IF-NEXT: fmv.w.x ft2, a0 +; RV64IF-NEXT: fmv.w.x ft3, a2 +; RV64IF-NEXT: fadd.s ft0, ft3, ft0 ; RV64IF-NEXT: fmsub.s ft0, ft2, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret @@ -371,29 +371,29 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; RV32IF-LABEL: fnmadd_s: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a2 -; RV32IF-NEXT: lui a2, %hi(.LCPI16_0) -; RV32IF-NEXT: addi a2, a2, %lo(.LCPI16_0) -; RV32IF-NEXT: flw ft1, 0(a2) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 -; RV32IF-NEXT: fmv.w.x ft2, a0 -; RV32IF-NEXT: fadd.s ft1, ft2, ft1 -; RV32IF-NEXT: fmv.w.x ft2, a1 -; RV32IF-NEXT: fnmadd.s ft0, ft1, ft2, ft0 +; RV32IF-NEXT: lui a3, %hi(.LCPI16_0) +; RV32IF-NEXT: addi a3, a3, %lo(.LCPI16_0) +; RV32IF-NEXT: flw ft0, 0(a3) +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fmv.w.x ft2, a2 +; RV32IF-NEXT: fmv.w.x ft3, a0 +; RV32IF-NEXT: fadd.s ft3, ft3, ft0 +; RV32IF-NEXT: fadd.s ft0, ft2, ft0 +; RV32IF-NEXT: fnmadd.s ft0, ft3, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fnmadd_s: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a2 -; RV64IF-NEXT: lui a2, %hi(.LCPI16_0) -; RV64IF-NEXT: addi a2, a2, %lo(.LCPI16_0) -; RV64IF-NEXT: flw ft1, 0(a2) -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 -; RV64IF-NEXT: fmv.w.x ft2, a0 -; RV64IF-NEXT: fadd.s ft1, ft2, ft1 -; RV64IF-NEXT: fmv.w.x ft2, a1 -; RV64IF-NEXT: fnmadd.s ft0, ft1, ft2, ft0 +; RV64IF-NEXT: lui a3, %hi(.LCPI16_0) +; RV64IF-NEXT: addi a3, a3, %lo(.LCPI16_0) +; RV64IF-NEXT: flw ft0, 0(a3) +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: fmv.w.x ft2, a2 +; RV64IF-NEXT: fmv.w.x ft3, a0 +; RV64IF-NEXT: fadd.s ft3, ft3, ft0 +; RV64IF-NEXT: fadd.s ft0, ft2, ft0 +; RV64IF-NEXT: fnmadd.s ft0, ft3, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %a_ = fadd float 0.0, %a @@ -407,26 +407,26 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; RV32IF-LABEL: fnmsub_s: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: lui a0, %hi(.LCPI17_0) -; RV32IF-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV32IF-NEXT: flw ft1, 0(a0) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: lui a3, %hi(.LCPI17_0) +; RV32IF-NEXT: addi a3, a3, %lo(.LCPI17_0) +; RV32IF-NEXT: flw ft0, 0(a3) ; RV32IF-NEXT: fmv.w.x ft1, a2 ; RV32IF-NEXT: fmv.w.x ft2, a1 +; RV32IF-NEXT: fmv.w.x ft3, a0 +; RV32IF-NEXT: fadd.s ft0, ft3, ft0 ; RV32IF-NEXT: fnmsub.s ft0, ft0, ft2, ft1 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fnmsub_s: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: lui a0, %hi(.LCPI17_0) -; RV64IF-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV64IF-NEXT: flw ft1, 0(a0) -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 +; RV64IF-NEXT: lui a3, %hi(.LCPI17_0) +; RV64IF-NEXT: addi a3, a3, %lo(.LCPI17_0) +; RV64IF-NEXT: flw ft0, 0(a3) ; RV64IF-NEXT: fmv.w.x ft1, a2 ; RV64IF-NEXT: fmv.w.x ft2, a1 +; RV64IF-NEXT: fmv.w.x ft3, a0 +; RV64IF-NEXT: fadd.s ft0, ft3, ft0 ; RV64IF-NEXT: fnmsub.s ft0, ft0, ft2, ft1 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll @@ -337,11 +337,11 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: feq.s a0, ft0, ft0 -; RV32IF-NEXT: and a0, a0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: feq.s a0, ft1, ft1 +; RV32IF-NEXT: feq.s a1, ft0, ft0 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: bnez a0, .LBB8_2 ; RV32IF-NEXT: # %bb.1: # %if.else ; RV32IF-NEXT: lw ra, 12(sp) @@ -354,11 +354,11 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: feq.s a0, ft0, ft0 -; RV64IF-NEXT: and a0, a0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: feq.s a0, ft1, ft1 +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: bnez a0, .LBB8_2 ; RV64IF-NEXT: # %bb.1: # %if.else ; RV64IF-NEXT: ld ra, 8(sp) @@ -635,11 +635,11 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: feq.s a0, ft0, ft0 -; RV32IF-NEXT: and a0, a0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: feq.s a0, ft1, ft1 +; RV32IF-NEXT: feq.s a1, ft0, ft0 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: seqz a0, a0 ; RV32IF-NEXT: bnez a0, .LBB15_2 ; RV32IF-NEXT: # %bb.1: # %if.else @@ -653,11 +653,11 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: feq.s a0, ft0, ft0 -; RV64IF-NEXT: and a0, a0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: feq.s a0, ft1, ft1 +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: seqz a0, a0 ; RV64IF-NEXT: bnez a0, .LBB15_2 ; RV64IF-NEXT: # %bb.1: # %if.else diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -97,17 +97,17 @@ define float @fmv_w_x(i32 %a, i32 %b) nounwind { ; RV32IF-LABEL: fmv_w_x: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fmv_w_x: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: fadd.s ft0, ft0, ft1 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret ; Ensure fmv.w.x is generated even for a soft float calling convention diff --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll @@ -146,20 +146,20 @@ define i32 @fcmp_ord(float %a, float %b) nounwind { ; RV32IF-LABEL: fcmp_ord: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: feq.s a0, ft0, ft0 -; RV32IF-NEXT: and a0, a0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: feq.s a0, ft1, ft1 +; RV32IF-NEXT: feq.s a1, ft0, ft0 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcmp_ord: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: feq.s a0, ft0, ft0 -; RV64IF-NEXT: and a0, a0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: feq.s a0, ft1, ft1 +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: ret %1 = fcmp ord float %a, %b %2 = zext i1 %1 to i32 @@ -303,21 +303,21 @@ define i32 @fcmp_uno(float %a, float %b) nounwind { ; RV32IF-LABEL: fcmp_uno: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: feq.s a0, ft0, ft0 -; RV32IF-NEXT: and a0, a0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: feq.s a0, ft1, ft1 +; RV32IF-NEXT: feq.s a1, ft0, ft0 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: seqz a0, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcmp_uno: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: feq.s a0, ft0, ft0 -; RV64IF-NEXT: and a0, a0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: feq.s a0, ft1, ft1 +; RV64IF-NEXT: feq.s a1, ft0, ft0 +; RV64IF-NEXT: and a0, a1, a0 ; RV64IF-NEXT: seqz a0, a0 ; RV64IF-NEXT: ret %1 = fcmp uno float %a, %b diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll --- a/llvm/test/CodeGen/RISCV/float-imm.ll +++ b/llvm/test/CodeGen/RISCV/float-imm.ll @@ -26,21 +26,21 @@ ; TODO: addi should be folded in to the flw ; RV32IF-LABEL: float_imm_op: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IF-NEXT: addi a0, a0, %lo(.LCPI1_0) -; RV32IF-NEXT: flw ft1, 0(a0) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) +; RV32IF-NEXT: addi a1, a1, %lo(.LCPI1_0) +; RV32IF-NEXT: flw ft0, 0(a1) +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: float_imm_op: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IF-NEXT: addi a0, a0, %lo(.LCPI1_0) -; RV64IF-NEXT: flw ft1, 0(a0) -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 +; RV64IF-NEXT: lui a1, %hi(.LCPI1_0) +; RV64IF-NEXT: addi a1, a1, %lo(.LCPI1_0) +; RV64IF-NEXT: flw ft0, 0(a1) +; RV64IF-NEXT: fmv.w.x ft1, a0 +; RV64IF-NEXT: fadd.s ft0, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %1 = fadd float %a, 1.0 diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -108,17 +108,16 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) ; RV32IF-NEXT: sw s0, 8(sp) -; RV32IF-NEXT: sw s1, 4(sp) ; RV32IF-NEXT: mv s0, a0 ; RV32IF-NEXT: call sinf -; RV32IF-NEXT: mv s1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fsw ft0, 4(sp) ; RV32IF-NEXT: mv a0, s0 ; RV32IF-NEXT: call cosf ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, s1 +; RV32IF-NEXT: flw ft1, 4(sp) ; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 -; RV32IF-NEXT: lw s1, 4(sp) ; RV32IF-NEXT: lw s0, 8(sp) ; RV32IF-NEXT: lw ra, 12(sp) ; RV32IF-NEXT: addi sp, sp, 16 @@ -129,17 +128,16 @@ ; RV64IF-NEXT: addi sp, sp, -32 ; RV64IF-NEXT: sd ra, 24(sp) ; RV64IF-NEXT: sd s0, 16(sp) -; RV64IF-NEXT: sd s1, 8(sp) ; RV64IF-NEXT: mv s0, a0 ; RV64IF-NEXT: call sinf -; RV64IF-NEXT: mv s1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fsw ft0, 12(sp) ; RV64IF-NEXT: mv a0, s0 ; RV64IF-NEXT: call cosf ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, s1 +; RV64IF-NEXT: flw ft1, 12(sp) ; RV64IF-NEXT: fadd.s ft0, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 -; RV64IF-NEXT: ld s1, 8(sp) ; RV64IF-NEXT: ld s0, 16(sp) ; RV64IF-NEXT: ld ra, 24(sp) ; RV64IF-NEXT: addi sp, sp, 32 @@ -324,21 +322,21 @@ ; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd ; RV32IF-LABEL: fmuladd_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: fmul.s ft0, ft1, ft0 -; RV32IF-NEXT: fmv.w.x ft1, a2 -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: fmv.w.x ft0, a2 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fmul.s ft1, ft2, ft1 +; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fmuladd_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fmul.s ft0, ft1, ft0 -; RV64IF-NEXT: fmv.w.x ft1, a2 -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 +; RV64IF-NEXT: fmv.w.x ft0, a2 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: fmv.w.x ft2, a0 +; RV64IF-NEXT: fmul.s ft1, ft2, ft1 +; RV64IF-NEXT: fadd.s ft0, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) diff --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll --- a/llvm/test/CodeGen/RISCV/float-mem.ll +++ b/llvm/test/CodeGen/RISCV/float-mem.ll @@ -7,17 +7,17 @@ define float @flw(float *%a) nounwind { ; RV32IF-LABEL: flw: ; RV32IF: # %bb.0: -; RV32IF-NEXT: flw ft0, 12(a0) -; RV32IF-NEXT: flw ft1, 0(a0) -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: flw ft0, 0(a0) +; RV32IF-NEXT: flw ft1, 12(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw: ; RV64IF: # %bb.0: -; RV64IF-NEXT: flw ft0, 12(a0) -; RV64IF-NEXT: flw ft1, 0(a0) -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: flw ft0, 0(a0) +; RV64IF-NEXT: flw ft1, 12(a0) +; RV64IF-NEXT: fadd.s ft0, ft0, ft1 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %1 = load float, float* %a @@ -37,8 +37,8 @@ ; RV32IF-NEXT: fmv.w.x ft0, a2 ; RV32IF-NEXT: fmv.w.x ft1, a1 ; RV32IF-NEXT: fadd.s ft0, ft1, ft0 -; RV32IF-NEXT: fsw ft0, 32(a0) ; RV32IF-NEXT: fsw ft0, 0(a0) +; RV32IF-NEXT: fsw ft0, 32(a0) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fsw: @@ -46,8 +46,8 @@ ; RV64IF-NEXT: fmv.w.x ft0, a2 ; RV64IF-NEXT: fmv.w.x ft1, a1 ; RV64IF-NEXT: fadd.s ft0, ft1, ft0 -; RV64IF-NEXT: fsw ft0, 32(a0) ; RV64IF-NEXT: fsw ft0, 0(a0) +; RV64IF-NEXT: fsw ft0, 32(a0) ; RV64IF-NEXT: ret %1 = fadd float %b, %c store float %1, float* %a @@ -70,10 +70,10 @@ ; RV32IF-NEXT: lui a0, %hi(G) ; RV32IF-NEXT: flw ft1, %lo(G)(a0) ; RV32IF-NEXT: fsw ft0, %lo(G)(a0) -; RV32IF-NEXT: addi a0, a0, %lo(G) -; RV32IF-NEXT: flw ft1, 36(a0) -; RV32IF-NEXT: fsw ft0, 36(a0) +; RV32IF-NEXT: addi a1, a0, %lo(G) +; RV32IF-NEXT: flw ft1, 36(a1) ; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: fsw ft0, 36(a1) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_fsw_global: @@ -84,10 +84,10 @@ ; RV64IF-NEXT: lui a0, %hi(G) ; RV64IF-NEXT: flw ft1, %lo(G)(a0) ; RV64IF-NEXT: fsw ft0, %lo(G)(a0) -; RV64IF-NEXT: addi a0, a0, %lo(G) -; RV64IF-NEXT: flw ft1, 36(a0) -; RV64IF-NEXT: fsw ft0, 36(a0) +; RV64IF-NEXT: addi a1, a0, %lo(G) +; RV64IF-NEXT: flw ft1, 36(a1) ; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: fsw ft0, 36(a1) ; RV64IF-NEXT: ret %1 = fadd float %a, %b %2 = load volatile float, float* @G @@ -102,24 +102,24 @@ define float @flw_fsw_constant(float %a) nounwind { ; RV32IF-LABEL: flw_fsw_constant: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: lui a0, 912092 -; RV32IF-NEXT: flw ft1, -273(a0) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 -; RV32IF-NEXT: fsw ft0, -273(a0) +; RV32IF-NEXT: lui a1, 912092 +; RV32IF-NEXT: flw ft0, -273(a1) +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: fsw ft0, -273(a1) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_fsw_constant: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: lui a0, 56 -; RV64IF-NEXT: addiw a0, a0, -1353 -; RV64IF-NEXT: slli a0, a0, 14 -; RV64IF-NEXT: flw ft1, -273(a0) -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 -; RV64IF-NEXT: fsw ft0, -273(a0) +; RV64IF-NEXT: lui a1, 56 +; RV64IF-NEXT: addiw a1, a1, -1353 +; RV64IF-NEXT: slli a1, a1, 14 +; RV64IF-NEXT: flw ft0, -273(a1) +; RV64IF-NEXT: fmv.w.x ft1, a0 +; RV64IF-NEXT: fadd.s ft0, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: fsw ft0, -273(a1) ; RV64IF-NEXT: ret %1 = inttoptr i32 3735928559 to float* %2 = load volatile float, float* %1 @@ -135,34 +135,32 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: sw s0, 8(sp) -; RV32IF-NEXT: mv s0, a0 -; RV32IF-NEXT: addi a0, sp, 4 +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fsw ft0, 4(sp) +; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call notdead -; RV32IF-NEXT: fmv.w.x ft0, s0 +; RV32IF-NEXT: flw ft0, 8(sp) ; RV32IF-NEXT: flw ft1, 4(sp) -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 ; RV32IF-NEXT: fmv.x.w a0, ft0 -; RV32IF-NEXT: lw s0, 8(sp) ; RV32IF-NEXT: lw ra, 12(sp) ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_stack: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -32 -; RV64IF-NEXT: sd ra, 24(sp) -; RV64IF-NEXT: sd s0, 16(sp) -; RV64IF-NEXT: mv s0, a0 -; RV64IF-NEXT: addi a0, sp, 12 +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(sp) +; RV64IF-NEXT: addi a0, sp, 4 ; RV64IF-NEXT: call notdead -; RV64IF-NEXT: fmv.w.x ft0, s0 -; RV64IF-NEXT: flw ft1, 12(sp) -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: flw ft0, 4(sp) +; RV64IF-NEXT: flw ft1, 0(sp) +; RV64IF-NEXT: fadd.s ft0, ft0, ft1 ; RV64IF-NEXT: fmv.x.w a0, ft0 -; RV64IF-NEXT: ld s0, 16(sp) -; RV64IF-NEXT: ld ra, 24(sp) -; RV64IF-NEXT: addi sp, sp, 32 +; RV64IF-NEXT: ld ra, 8(sp) +; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret %1 = alloca float, align 4 %2 = bitcast float* %1 to i8* diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -240,14 +240,14 @@ define float @select_fcmp_ueq(float %a, float %b) nounwind { ; RV32IF-LABEL: select_fcmp_ueq: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 ; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: feq.s a0, ft0, ft1 +; RV32IF-NEXT: feq.s a1, ft1, ft1 +; RV32IF-NEXT: feq.s a2, ft0, ft0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: bnez a0, .LBB8_2 ; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fmv.s ft0, ft1 @@ -257,14 +257,14 @@ ; ; RV64IF-LABEL: select_fcmp_ueq: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: seqz a0, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: feq.s a0, ft0, ft1 +; RV64IF-NEXT: feq.s a1, ft1, ft1 +; RV64IF-NEXT: feq.s a2, ft0, ft0 +; RV64IF-NEXT: and a1, a2, a1 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: or a0, a0, a1 ; RV64IF-NEXT: bnez a0, .LBB8_2 ; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fmv.s ft0, ft1 @@ -486,24 +486,24 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.w.x ft0, a1 ; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: feq.s a0, ft1, ft0 -; RV32IF-NEXT: bnez a0, .LBB16_2 +; RV32IF-NEXT: feq.s a1, ft1, ft0 +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: bnez a1, .LBB16_2 ; RV32IF-NEXT: # %bb.1: -; RV32IF-NEXT: mv a2, a3 +; RV32IF-NEXT: mv a0, a3 ; RV32IF-NEXT: .LBB16_2: -; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: i32_select_fcmp_oeq: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a1 ; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: feq.s a0, ft1, ft0 -; RV64IF-NEXT: bnez a0, .LBB16_2 +; RV64IF-NEXT: feq.s a1, ft1, ft0 +; RV64IF-NEXT: mv a0, a2 +; RV64IF-NEXT: bnez a1, .LBB16_2 ; RV64IF-NEXT: # %bb.1: -; RV64IF-NEXT: mv a2, a3 +; RV64IF-NEXT: mv a0, a3 ; RV64IF-NEXT: .LBB16_2: -; RV64IF-NEXT: mv a0, a2 ; RV64IF-NEXT: ret %1 = fcmp oeq float %a, %b %2 = select i1 %1, i32 %c, i32 %d diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -13,28 +13,28 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) -; RV32I-NEXT: lui a0, %hi(y) -; RV32I-NEXT: lw a1, %lo(y)(a0) -; RV32I-NEXT: sw a1, 8(sp) -; RV32I-NEXT: lui a1, %hi(x) -; RV32I-NEXT: lw a2, %lo(x)(a1) -; RV32I-NEXT: sw a2, 24(sp) -; RV32I-NEXT: addi a0, a0, %lo(y) -; RV32I-NEXT: lw a2, 12(a0) -; RV32I-NEXT: sw a2, 20(sp) -; RV32I-NEXT: lw a2, 8(a0) -; RV32I-NEXT: sw a2, 16(sp) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: addi a0, a1, %lo(x) -; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: lui a0, %hi(x) +; RV32I-NEXT: addi a1, a0, %lo(x) +; RV32I-NEXT: lw a6, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lw a0, %lo(x)(a0) +; RV32I-NEXT: lui a4, %hi(y) +; RV32I-NEXT: addi a5, a4, %lo(y) +; RV32I-NEXT: lw a2, 4(a5) +; RV32I-NEXT: lw a3, 8(a5) +; RV32I-NEXT: lw a5, 12(a5) +; RV32I-NEXT: lw a4, %lo(y)(a4) +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a5, 20(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a2, 12(sp) ; RV32I-NEXT: sw a1, 36(sp) -; RV32I-NEXT: lw a1, 8(a0) -; RV32I-NEXT: sw a1, 32(sp) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a7, 32(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: sw a6, 28(sp) ; RV32I-NEXT: call __netf2 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) @@ -52,39 +52,39 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: sw ra, 76(sp) -; RV32I-NEXT: lui a0, %hi(y) -; RV32I-NEXT: lw a1, %lo(y)(a0) -; RV32I-NEXT: sw a1, 24(sp) -; RV32I-NEXT: lui a1, %hi(x) -; RV32I-NEXT: lw a2, %lo(x)(a1) -; RV32I-NEXT: sw a2, 40(sp) -; RV32I-NEXT: addi a0, a0, %lo(y) -; RV32I-NEXT: lw a2, 12(a0) -; RV32I-NEXT: sw a2, 36(sp) -; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: lui a0, %hi(x) +; RV32I-NEXT: addi a1, a0, %lo(x) +; RV32I-NEXT: lw a6, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lw a0, %lo(x)(a0) +; RV32I-NEXT: lui a4, %hi(y) +; RV32I-NEXT: addi a5, a4, %lo(y) +; RV32I-NEXT: lw a3, 4(a5) +; RV32I-NEXT: lw a2, 8(a5) +; RV32I-NEXT: lw a5, 12(a5) +; RV32I-NEXT: lw a4, %lo(y)(a4) +; RV32I-NEXT: sw a4, 24(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a5, 36(sp) ; RV32I-NEXT: sw a2, 32(sp) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: addi a0, a1, %lo(x) -; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: sw a3, 28(sp) ; RV32I-NEXT: sw a1, 52(sp) -; RV32I-NEXT: lw a1, 8(a0) -; RV32I-NEXT: sw a1, 48(sp) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sw a0, 44(sp) +; RV32I-NEXT: sw a7, 48(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: sw a6, 44(sp) ; RV32I-NEXT: call __addtf3 -; RV32I-NEXT: lw a0, 68(sp) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: lw a0, 64(sp) -; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: lw a1, 56(sp) ; RV32I-NEXT: lw a0, 60(sp) +; RV32I-NEXT: lw a2, 64(sp) +; RV32I-NEXT: lw a3, 68(sp) +; RV32I-NEXT: sw a3, 20(sp) +; RV32I-NEXT: sw a2, 16(sp) ; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: lw a0, 56(sp) -; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: sw a1, 8(sp) ; RV32I-NEXT: call __fixtfsi ; RV32I-NEXT: lw ra, 76(sp) ; RV32I-NEXT: addi sp, sp, 80 diff --git a/llvm/test/CodeGen/RISCV/frame-info.ll b/llvm/test/CodeGen/RISCV/frame-info.ll --- a/llvm/test/CodeGen/RISCV/frame-info.ll +++ b/llvm/test/CodeGen/RISCV/frame-info.ll @@ -38,12 +38,12 @@ ; RV64-NEXT: .cfi_offset s0, -16 ; RV64-NEXT: addi s0, sp, 16 ; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: addi a1, zero, 1 -; RV64-NEXT: slli a1, a1, 33 -; RV64-NEXT: addi a1, a1, -16 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: addi a0, a0, 15 +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: addi a1, a1, -16 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: sub a0, sp, a0 ; RV64-NEXT: mv sp, a0 diff --git a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll --- a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll +++ b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll @@ -6,21 +6,21 @@ ; RV32I-LABEL: getSetCCResultType: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: lw a3, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) ; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: seqz a2, a2 +; RV32I-NEXT: seqz a3, a3 +; RV32I-NEXT: seqz a4, a4 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a2, a2 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: lw a1, 8(a0) -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sw a1, 4(a0) -; RV32I-NEXT: lw a1, 0(a0) -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: ret entry: %0 = load <4 x i32>, <4 x i32>* %p, align 16 diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -12,10 +12,10 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a0, %hi(s) ; CHECK-NEXT: addi a0, a0, %lo(s) -; CHECK-NEXT: addi a1, zero, 20 -; CHECK-NEXT: sw a1, 164(a0) ; CHECK-NEXT: addi a1, zero, 10 ; CHECK-NEXT: sw a1, 160(a0) +; CHECK-NEXT: addi a1, zero, 20 +; CHECK-NEXT: sw a1, 164(a0) ; CHECK-NEXT: ret entry: store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 diff --git a/llvm/test/CodeGen/RISCV/imm-cse.ll b/llvm/test/CodeGen/RISCV/imm-cse.ll --- a/llvm/test/CodeGen/RISCV/imm-cse.ll +++ b/llvm/test/CodeGen/RISCV/imm-cse.ll @@ -10,19 +10,19 @@ define void @imm32_cse() nounwind { ; RV32I-LABEL: imm32_cse: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, 1 -; RV32I-NEXT: lui a1, %hi(src) -; RV32I-NEXT: lw a2, %lo(src)(a1) -; RV32I-NEXT: add a2, a2, a0 -; RV32I-NEXT: lui a3, %hi(dst) -; RV32I-NEXT: sw a2, %lo(dst)(a3) -; RV32I-NEXT: lw a2, %lo(src)(a1) -; RV32I-NEXT: add a2, a2, a0 +; RV32I-NEXT: lui a0, %hi(src) +; RV32I-NEXT: lw a1, %lo(src)(a0) +; RV32I-NEXT: lui a2, 1 ; RV32I-NEXT: addi a2, a2, 1 -; RV32I-NEXT: sw a2, %lo(dst)(a3) -; RV32I-NEXT: lw a1, %lo(src)(a1) -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: lui a3, %hi(dst) +; RV32I-NEXT: sw a1, %lo(dst)(a3) +; RV32I-NEXT: lw a1, %lo(src)(a0) +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, 1 +; RV32I-NEXT: sw a1, %lo(dst)(a3) +; RV32I-NEXT: lw a0, %lo(src)(a0) +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: addi a0, a0, 2 ; RV32I-NEXT: sw a0, %lo(dst)(a3) ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -142,8 +142,8 @@ define i64 @imm64_3() nounwind { ; RV32I-LABEL: imm64_3: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: addi a1, zero, 1 +; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: ret ; ; RV64I-LABEL: imm64_3: @@ -157,8 +157,8 @@ define i64 @imm64_4() nounwind { ; RV32I-LABEL: imm64_4: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: ret ; ; RV64I-LABEL: imm64_4: @@ -172,8 +172,8 @@ define i64 @imm64_5() nounwind { ; RV32I-LABEL: imm64_5: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: ret ; ; RV64I-LABEL: imm64_5: @@ -249,7 +249,7 @@ ; RV32I-LABEL: imm64_9: ; RV32I: # %bb.0: ; RV32I-NEXT: addi a0, zero, -1 -; RV32I-NEXT: addi a1, zero, -1 +; RV32I-NEXT: addi a1, zero, -1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: imm64_9: diff --git a/llvm/test/CodeGen/RISCV/indirectbr.ll b/llvm/test/CodeGen/RISCV/indirectbr.ll --- a/llvm/test/CodeGen/RISCV/indirectbr.ll +++ b/llvm/test/CodeGen/RISCV/indirectbr.ll @@ -8,7 +8,7 @@ ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) ; RV32I-NEXT: jr a0 -; RV32I-NEXT: .LBB0_1: +; RV32I-NEXT: .LBB0_1: # %test_label ; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 @@ -26,7 +26,7 @@ ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) ; RV32I-NEXT: jr 1380(a0) -; RV32I-NEXT: .LBB1_1: +; RV32I-NEXT: .LBB1_1: # %test_label ; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll --- a/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll @@ -26,11 +26,11 @@ ; ; RV64F-LABEL: constraint_f_double: ; RV64F: # %bb.0: -; RV64F-NEXT: fmv.d.x ft0, a0 -; RV64F-NEXT: lui a0, %hi(gd) -; RV64F-NEXT: fld ft1, %lo(gd)(a0) +; RV64F-NEXT: lui a1, %hi(gd) +; RV64F-NEXT: fld ft0, %lo(gd)(a1) +; RV64F-NEXT: fmv.d.x ft1, a0 ; RV64F-NEXT: #APP -; RV64F-NEXT: fadd.d ft0, ft0, ft1 +; RV64F-NEXT: fadd.d ft0, ft1, ft0 ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.d a0, ft0 ; RV64F-NEXT: ret @@ -59,9 +59,9 @@ ; ; RV64F-LABEL: constraint_f_double_abi_name: ; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gd) +; RV64F-NEXT: fld fs0, %lo(gd)(a1) ; RV64F-NEXT: fmv.d.x fa1, a0 -; RV64F-NEXT: lui a0, %hi(gd) -; RV64F-NEXT: fld fs0, %lo(gd)(a0) ; RV64F-NEXT: #APP ; RV64F-NEXT: fadd.d ft0, fa1, fs0 ; RV64F-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll --- a/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll @@ -9,22 +9,22 @@ define float @constraint_f_float(float %a) nounwind { ; RV32F-LABEL: constraint_f_float: ; RV32F: # %bb.0: -; RV32F-NEXT: fmv.w.x ft0, a0 -; RV32F-NEXT: lui a0, %hi(gf) -; RV32F-NEXT: flw ft1, %lo(gf)(a0) +; RV32F-NEXT: lui a1, %hi(gf) +; RV32F-NEXT: flw ft0, %lo(gf)(a1) +; RV32F-NEXT: fmv.w.x ft1, a0 ; RV32F-NEXT: #APP -; RV32F-NEXT: fadd.s ft0, ft0, ft1 +; RV32F-NEXT: fadd.s ft0, ft1, ft0 ; RV32F-NEXT: #NO_APP ; RV32F-NEXT: fmv.x.w a0, ft0 ; RV32F-NEXT: ret ; ; RV64F-LABEL: constraint_f_float: ; RV64F: # %bb.0: -; RV64F-NEXT: fmv.w.x ft0, a0 -; RV64F-NEXT: lui a0, %hi(gf) -; RV64F-NEXT: flw ft1, %lo(gf)(a0) +; RV64F-NEXT: lui a1, %hi(gf) +; RV64F-NEXT: flw ft0, %lo(gf)(a1) +; RV64F-NEXT: fmv.w.x ft1, a0 ; RV64F-NEXT: #APP -; RV64F-NEXT: fadd.s ft0, ft0, ft1 +; RV64F-NEXT: fadd.s ft0, ft1, ft0 ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.w a0, ft0 ; RV64F-NEXT: ret @@ -36,9 +36,9 @@ define float @constraint_f_float_abi_name(float %a) nounwind { ; RV32F-LABEL: constraint_f_float_abi_name: ; RV32F: # %bb.0: +; RV32F-NEXT: lui a1, %hi(gf) +; RV32F-NEXT: flw fs0, %lo(gf)(a1) ; RV32F-NEXT: fmv.w.x fa0, a0 -; RV32F-NEXT: lui a0, %hi(gf) -; RV32F-NEXT: flw fs0, %lo(gf)(a0) ; RV32F-NEXT: #APP ; RV32F-NEXT: fadd.s ft0, fa0, fs0 ; RV32F-NEXT: #NO_APP @@ -47,9 +47,9 @@ ; ; RV64F-LABEL: constraint_f_float_abi_name: ; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gf) +; RV64F-NEXT: flw fs0, %lo(gf)(a1) ; RV64F-NEXT: fmv.w.x fa0, a0 -; RV64F-NEXT: lui a0, %hi(gf) -; RV64F-NEXT: flw fs0, %lo(gf)(a0) ; RV64F-NEXT: #APP ; RV64F-NEXT: fadd.s ft0, fa0, fs0 ; RV64F-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll --- a/llvm/test/CodeGen/RISCV/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm.ll @@ -182,6 +182,13 @@ ; RV32I-NEXT: add a0, a0, zero ; RV32I-NEXT: #NO_APP ; RV32I-NEXT: ret +; +; RV64I-LABEL: modifier_z_zero: +; RV64I: # %bb.0: +; RV64I-NEXT: #APP +; RV64I-NEXT: add a0, a0, zero +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret %1 = tail call i32 asm "add $0, $1, ${2:z}", "=r,r,r"(i32 %a, i32 0) ret i32 %1 } @@ -194,6 +201,14 @@ ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: #NO_APP ; RV32I-NEXT: ret +; +; RV64I-LABEL: modifier_z_nonzero: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, zero, 1 +; RV64I-NEXT: #APP +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret %1 = tail call i32 asm "add $0, $1, ${2:z}", "=r,r,r"(i32 %a, i32 1) ret i32 %1 } @@ -205,6 +220,13 @@ ; RV32I-NEXT: addi a0, a0, 1 ; RV32I-NEXT: #NO_APP ; RV32I-NEXT: ret +; +; RV64I-LABEL: modifier_i_imm: +; RV64I: # %bb.0: +; RV64I-NEXT: #APP +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret %1 = tail call i32 asm "add${2:i} $0, $1, $2", "=r,r,ri"(i32 %a, i32 1) ret i32 %1 } @@ -216,6 +238,13 @@ ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: #NO_APP ; RV32I-NEXT: ret +; +; RV64I-LABEL: modifier_i_reg: +; RV64I: # %bb.0: +; RV64I-NEXT: #APP +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret %1 = tail call i32 asm "add${2:i} $0, $1, $2", "=r,r,ri"(i32 %a, i32 %b) ret i32 %1 } diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll --- a/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll @@ -40,6 +40,39 @@ ; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: mret ; +; CHECK-RV32-F-LABEL: foo_i32: +; CHECK-RV32-F: # %bb.0: +; CHECK-RV32-F-NEXT: addi sp, sp, -16 +; CHECK-RV32-F-NEXT: sw a0, 12(sp) +; CHECK-RV32-F-NEXT: sw a1, 8(sp) +; CHECK-RV32-F-NEXT: lui a0, %hi(a) +; CHECK-RV32-F-NEXT: lw a0, %lo(a)(a0) +; CHECK-RV32-F-NEXT: lui a1, %hi(b) +; CHECK-RV32-F-NEXT: lw a1, %lo(b)(a1) +; CHECK-RV32-F-NEXT: add a0, a1, a0 +; CHECK-RV32-F-NEXT: lui a1, %hi(c) +; CHECK-RV32-F-NEXT: sw a0, %lo(c)(a1) +; CHECK-RV32-F-NEXT: lw a1, 8(sp) +; CHECK-RV32-F-NEXT: lw a0, 12(sp) +; CHECK-RV32-F-NEXT: addi sp, sp, 16 +; CHECK-RV32-F-NEXT: mret +; +; CHECK-RV32-FD-LABEL: foo_i32: +; CHECK-RV32-FD: # %bb.0: +; CHECK-RV32-FD-NEXT: addi sp, sp, -16 +; CHECK-RV32-FD-NEXT: sw a0, 12(sp) +; CHECK-RV32-FD-NEXT: sw a1, 8(sp) +; CHECK-RV32-FD-NEXT: lui a0, %hi(a) +; CHECK-RV32-FD-NEXT: lw a0, %lo(a)(a0) +; CHECK-RV32-FD-NEXT: lui a1, %hi(b) +; CHECK-RV32-FD-NEXT: lw a1, %lo(b)(a1) +; CHECK-RV32-FD-NEXT: add a0, a1, a0 +; CHECK-RV32-FD-NEXT: lui a1, %hi(c) +; CHECK-RV32-FD-NEXT: sw a0, %lo(c)(a1) +; CHECK-RV32-FD-NEXT: lw a1, 8(sp) +; CHECK-RV32-FD-NEXT: lw a0, 12(sp) +; CHECK-RV32-FD-NEXT: addi sp, sp, 16 +; CHECK-RV32-FD-NEXT: mret %1 = load i32, i32* @a %2 = load i32, i32* @b %add = add nsw i32 %2, %1 @@ -74,6 +107,49 @@ ; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: mret ; +; CHECK-RV32-F-LABEL: foo_fp_i32: +; CHECK-RV32-F: # %bb.0: +; CHECK-RV32-F-NEXT: addi sp, sp, -16 +; CHECK-RV32-F-NEXT: sw ra, 12(sp) +; CHECK-RV32-F-NEXT: sw s0, 8(sp) +; CHECK-RV32-F-NEXT: sw a0, 4(sp) +; CHECK-RV32-F-NEXT: sw a1, 0(sp) +; CHECK-RV32-F-NEXT: addi s0, sp, 16 +; CHECK-RV32-F-NEXT: lui a0, %hi(a) +; CHECK-RV32-F-NEXT: lw a0, %lo(a)(a0) +; CHECK-RV32-F-NEXT: lui a1, %hi(b) +; CHECK-RV32-F-NEXT: lw a1, %lo(b)(a1) +; CHECK-RV32-F-NEXT: add a0, a1, a0 +; CHECK-RV32-F-NEXT: lui a1, %hi(c) +; CHECK-RV32-F-NEXT: sw a0, %lo(c)(a1) +; CHECK-RV32-F-NEXT: lw a1, 0(sp) +; CHECK-RV32-F-NEXT: lw a0, 4(sp) +; CHECK-RV32-F-NEXT: lw s0, 8(sp) +; CHECK-RV32-F-NEXT: lw ra, 12(sp) +; CHECK-RV32-F-NEXT: addi sp, sp, 16 +; CHECK-RV32-F-NEXT: mret +; +; CHECK-RV32-FD-LABEL: foo_fp_i32: +; CHECK-RV32-FD: # %bb.0: +; CHECK-RV32-FD-NEXT: addi sp, sp, -16 +; CHECK-RV32-FD-NEXT: sw ra, 12(sp) +; CHECK-RV32-FD-NEXT: sw s0, 8(sp) +; CHECK-RV32-FD-NEXT: sw a0, 4(sp) +; CHECK-RV32-FD-NEXT: sw a1, 0(sp) +; CHECK-RV32-FD-NEXT: addi s0, sp, 16 +; CHECK-RV32-FD-NEXT: lui a0, %hi(a) +; CHECK-RV32-FD-NEXT: lw a0, %lo(a)(a0) +; CHECK-RV32-FD-NEXT: lui a1, %hi(b) +; CHECK-RV32-FD-NEXT: lw a1, %lo(b)(a1) +; CHECK-RV32-FD-NEXT: add a0, a1, a0 +; CHECK-RV32-FD-NEXT: lui a1, %hi(c) +; CHECK-RV32-FD-NEXT: sw a0, %lo(c)(a1) +; CHECK-RV32-FD-NEXT: lw a1, 0(sp) +; CHECK-RV32-FD-NEXT: lw a0, 4(sp) +; CHECK-RV32-FD-NEXT: lw s0, 8(sp) +; CHECK-RV32-FD-NEXT: lw ra, 12(sp) +; CHECK-RV32-FD-NEXT: addi sp, sp, 16 +; CHECK-RV32-FD-NEXT: mret %1 = load i32, i32* @a %2 = load i32, i32* @b %add = add nsw i32 %2, %1 @@ -86,17 +162,62 @@ @d = external global float define void @foo_float() nounwind #0 { +; CHECK-RV32-LABEL: foo_float: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -64 +; CHECK-RV32-NEXT: sw ra, 60(sp) +; CHECK-RV32-NEXT: sw t0, 56(sp) +; CHECK-RV32-NEXT: sw t1, 52(sp) +; CHECK-RV32-NEXT: sw t2, 48(sp) +; CHECK-RV32-NEXT: sw a0, 44(sp) +; CHECK-RV32-NEXT: sw a1, 40(sp) +; CHECK-RV32-NEXT: sw a2, 36(sp) +; CHECK-RV32-NEXT: sw a3, 32(sp) +; CHECK-RV32-NEXT: sw a4, 28(sp) +; CHECK-RV32-NEXT: sw a5, 24(sp) +; CHECK-RV32-NEXT: sw a6, 20(sp) +; CHECK-RV32-NEXT: sw a7, 16(sp) +; CHECK-RV32-NEXT: sw t3, 12(sp) +; CHECK-RV32-NEXT: sw t4, 8(sp) +; CHECK-RV32-NEXT: sw t5, 4(sp) +; CHECK-RV32-NEXT: sw t6, 0(sp) +; CHECK-RV32-NEXT: lui a0, %hi(e) +; CHECK-RV32-NEXT: lw a0, %lo(e)(a0) +; CHECK-RV32-NEXT: lui a1, %hi(f) +; CHECK-RV32-NEXT: lw a1, %lo(f)(a1) +; CHECK-RV32-NEXT: call __addsf3 +; CHECK-RV32-NEXT: lui a1, %hi(d) +; CHECK-RV32-NEXT: sw a0, %lo(d)(a1) +; CHECK-RV32-NEXT: lw t6, 0(sp) +; CHECK-RV32-NEXT: lw t5, 4(sp) +; CHECK-RV32-NEXT: lw t4, 8(sp) +; CHECK-RV32-NEXT: lw t3, 12(sp) +; CHECK-RV32-NEXT: lw a7, 16(sp) +; CHECK-RV32-NEXT: lw a6, 20(sp) +; CHECK-RV32-NEXT: lw a5, 24(sp) +; CHECK-RV32-NEXT: lw a4, 28(sp) +; CHECK-RV32-NEXT: lw a3, 32(sp) +; CHECK-RV32-NEXT: lw a2, 36(sp) +; CHECK-RV32-NEXT: lw a1, 40(sp) +; CHECK-RV32-NEXT: lw a0, 44(sp) +; CHECK-RV32-NEXT: lw t2, 48(sp) +; CHECK-RV32-NEXT: lw t1, 52(sp) +; CHECK-RV32-NEXT: lw t0, 56(sp) +; CHECK-RV32-NEXT: lw ra, 60(sp) +; CHECK-RV32-NEXT: addi sp, sp, 64 +; CHECK-RV32-NEXT: mret +; ; CHECK-RV32-F-LABEL: foo_float: ; CHECK-RV32-F: # %bb.0: ; CHECK-RV32-F-NEXT: addi sp, sp, -16 ; CHECK-RV32-F-NEXT: sw a0, 12(sp) ; CHECK-RV32-F-NEXT: fsw ft0, 8(sp) ; CHECK-RV32-F-NEXT: fsw ft1, 4(sp) -; CHECK-RV32-F-NEXT: lui a0, %hi(f) -; CHECK-RV32-F-NEXT: flw ft0, %lo(f)(a0) ; CHECK-RV32-F-NEXT: lui a0, %hi(e) -; CHECK-RV32-F-NEXT: flw ft1, %lo(e)(a0) -; CHECK-RV32-F-NEXT: fadd.s ft0, ft1, ft0 +; CHECK-RV32-F-NEXT: flw ft0, %lo(e)(a0) +; CHECK-RV32-F-NEXT: lui a0, %hi(f) +; CHECK-RV32-F-NEXT: flw ft1, %lo(f)(a0) +; CHECK-RV32-F-NEXT: fadd.s ft0, ft0, ft1 ; CHECK-RV32-F-NEXT: lui a0, %hi(d) ; CHECK-RV32-F-NEXT: fsw ft0, %lo(d)(a0) ; CHECK-RV32-F-NEXT: flw ft1, 4(sp) @@ -105,6 +226,24 @@ ; CHECK-RV32-F-NEXT: addi sp, sp, 16 ; CHECK-RV32-F-NEXT: mret ; +; CHECK-RV32-FD-LABEL: foo_float: +; CHECK-RV32-FD: # %bb.0: +; CHECK-RV32-FD-NEXT: addi sp, sp, -32 +; CHECK-RV32-FD-NEXT: sw a0, 28(sp) +; CHECK-RV32-FD-NEXT: fsd ft0, 16(sp) +; CHECK-RV32-FD-NEXT: fsd ft1, 8(sp) +; CHECK-RV32-FD-NEXT: lui a0, %hi(e) +; CHECK-RV32-FD-NEXT: flw ft0, %lo(e)(a0) +; CHECK-RV32-FD-NEXT: lui a0, %hi(f) +; CHECK-RV32-FD-NEXT: flw ft1, %lo(f)(a0) +; CHECK-RV32-FD-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-RV32-FD-NEXT: lui a0, %hi(d) +; CHECK-RV32-FD-NEXT: fsw ft0, %lo(d)(a0) +; CHECK-RV32-FD-NEXT: fld ft1, 8(sp) +; CHECK-RV32-FD-NEXT: fld ft0, 16(sp) +; CHECK-RV32-FD-NEXT: lw a0, 28(sp) +; CHECK-RV32-FD-NEXT: addi sp, sp, 32 +; CHECK-RV32-FD-NEXT: mret %1 = load float, float* @e %2 = load float, float* @f %add = fadd float %1, %2 @@ -116,6 +255,54 @@ ; Additionally check frame pointer and return address are properly saved. ; define void @foo_fp_float() nounwind #1 { +; CHECK-RV32-LABEL: foo_fp_float: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -80 +; CHECK-RV32-NEXT: sw ra, 76(sp) +; CHECK-RV32-NEXT: sw t0, 72(sp) +; CHECK-RV32-NEXT: sw t1, 68(sp) +; CHECK-RV32-NEXT: sw t2, 64(sp) +; CHECK-RV32-NEXT: sw s0, 60(sp) +; CHECK-RV32-NEXT: sw a0, 56(sp) +; CHECK-RV32-NEXT: sw a1, 52(sp) +; CHECK-RV32-NEXT: sw a2, 48(sp) +; CHECK-RV32-NEXT: sw a3, 44(sp) +; CHECK-RV32-NEXT: sw a4, 40(sp) +; CHECK-RV32-NEXT: sw a5, 36(sp) +; CHECK-RV32-NEXT: sw a6, 32(sp) +; CHECK-RV32-NEXT: sw a7, 28(sp) +; CHECK-RV32-NEXT: sw t3, 24(sp) +; CHECK-RV32-NEXT: sw t4, 20(sp) +; CHECK-RV32-NEXT: sw t5, 16(sp) +; CHECK-RV32-NEXT: sw t6, 12(sp) +; CHECK-RV32-NEXT: addi s0, sp, 80 +; CHECK-RV32-NEXT: lui a0, %hi(e) +; CHECK-RV32-NEXT: lw a0, %lo(e)(a0) +; CHECK-RV32-NEXT: lui a1, %hi(f) +; CHECK-RV32-NEXT: lw a1, %lo(f)(a1) +; CHECK-RV32-NEXT: call __addsf3 +; CHECK-RV32-NEXT: lui a1, %hi(d) +; CHECK-RV32-NEXT: sw a0, %lo(d)(a1) +; CHECK-RV32-NEXT: lw t6, 12(sp) +; CHECK-RV32-NEXT: lw t5, 16(sp) +; CHECK-RV32-NEXT: lw t4, 20(sp) +; CHECK-RV32-NEXT: lw t3, 24(sp) +; CHECK-RV32-NEXT: lw a7, 28(sp) +; CHECK-RV32-NEXT: lw a6, 32(sp) +; CHECK-RV32-NEXT: lw a5, 36(sp) +; CHECK-RV32-NEXT: lw a4, 40(sp) +; CHECK-RV32-NEXT: lw a3, 44(sp) +; CHECK-RV32-NEXT: lw a2, 48(sp) +; CHECK-RV32-NEXT: lw a1, 52(sp) +; CHECK-RV32-NEXT: lw a0, 56(sp) +; CHECK-RV32-NEXT: lw s0, 60(sp) +; CHECK-RV32-NEXT: lw t2, 64(sp) +; CHECK-RV32-NEXT: lw t1, 68(sp) +; CHECK-RV32-NEXT: lw t0, 72(sp) +; CHECK-RV32-NEXT: lw ra, 76(sp) +; CHECK-RV32-NEXT: addi sp, sp, 80 +; CHECK-RV32-NEXT: mret +; ; CHECK-RV32-F-LABEL: foo_fp_float: ; CHECK-RV32-F: # %bb.0: ; CHECK-RV32-F-NEXT: addi sp, sp, -32 @@ -125,11 +312,11 @@ ; CHECK-RV32-F-NEXT: fsw ft0, 16(sp) ; CHECK-RV32-F-NEXT: fsw ft1, 12(sp) ; CHECK-RV32-F-NEXT: addi s0, sp, 32 -; CHECK-RV32-F-NEXT: lui a0, %hi(f) -; CHECK-RV32-F-NEXT: flw ft0, %lo(f)(a0) ; CHECK-RV32-F-NEXT: lui a0, %hi(e) -; CHECK-RV32-F-NEXT: flw ft1, %lo(e)(a0) -; CHECK-RV32-F-NEXT: fadd.s ft0, ft1, ft0 +; CHECK-RV32-F-NEXT: flw ft0, %lo(e)(a0) +; CHECK-RV32-F-NEXT: lui a0, %hi(f) +; CHECK-RV32-F-NEXT: flw ft1, %lo(f)(a0) +; CHECK-RV32-F-NEXT: fadd.s ft0, ft0, ft1 ; CHECK-RV32-F-NEXT: lui a0, %hi(d) ; CHECK-RV32-F-NEXT: fsw ft0, %lo(d)(a0) ; CHECK-RV32-F-NEXT: flw ft1, 12(sp) @@ -140,6 +327,29 @@ ; CHECK-RV32-F-NEXT: addi sp, sp, 32 ; CHECK-RV32-F-NEXT: mret ; +; CHECK-RV32-FD-LABEL: foo_fp_float: +; CHECK-RV32-FD: # %bb.0: +; CHECK-RV32-FD-NEXT: addi sp, sp, -32 +; CHECK-RV32-FD-NEXT: sw ra, 28(sp) +; CHECK-RV32-FD-NEXT: sw s0, 24(sp) +; CHECK-RV32-FD-NEXT: sw a0, 20(sp) +; CHECK-RV32-FD-NEXT: fsd ft0, 8(sp) +; CHECK-RV32-FD-NEXT: fsd ft1, 0(sp) +; CHECK-RV32-FD-NEXT: addi s0, sp, 32 +; CHECK-RV32-FD-NEXT: lui a0, %hi(e) +; CHECK-RV32-FD-NEXT: flw ft0, %lo(e)(a0) +; CHECK-RV32-FD-NEXT: lui a0, %hi(f) +; CHECK-RV32-FD-NEXT: flw ft1, %lo(f)(a0) +; CHECK-RV32-FD-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-RV32-FD-NEXT: lui a0, %hi(d) +; CHECK-RV32-FD-NEXT: fsw ft0, %lo(d)(a0) +; CHECK-RV32-FD-NEXT: fld ft1, 0(sp) +; CHECK-RV32-FD-NEXT: fld ft0, 8(sp) +; CHECK-RV32-FD-NEXT: lw a0, 20(sp) +; CHECK-RV32-FD-NEXT: lw s0, 24(sp) +; CHECK-RV32-FD-NEXT: lw ra, 28(sp) +; CHECK-RV32-FD-NEXT: addi sp, sp, 32 +; CHECK-RV32-FD-NEXT: mret %1 = load float, float* @e %2 = load float, float* @f %add = fadd float %1, %2 @@ -152,17 +362,183 @@ @g = external global double define void @foo_double() nounwind #0 { +; CHECK-RV32-LABEL: foo_double: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -64 +; CHECK-RV32-NEXT: sw ra, 60(sp) +; CHECK-RV32-NEXT: sw t0, 56(sp) +; CHECK-RV32-NEXT: sw t1, 52(sp) +; CHECK-RV32-NEXT: sw t2, 48(sp) +; CHECK-RV32-NEXT: sw a0, 44(sp) +; CHECK-RV32-NEXT: sw a1, 40(sp) +; CHECK-RV32-NEXT: sw a2, 36(sp) +; CHECK-RV32-NEXT: sw a3, 32(sp) +; CHECK-RV32-NEXT: sw a4, 28(sp) +; CHECK-RV32-NEXT: sw a5, 24(sp) +; CHECK-RV32-NEXT: sw a6, 20(sp) +; CHECK-RV32-NEXT: sw a7, 16(sp) +; CHECK-RV32-NEXT: sw t3, 12(sp) +; CHECK-RV32-NEXT: sw t4, 8(sp) +; CHECK-RV32-NEXT: sw t5, 4(sp) +; CHECK-RV32-NEXT: sw t6, 0(sp) +; CHECK-RV32-NEXT: lui a1, %hi(h) +; CHECK-RV32-NEXT: lw a0, %lo(h)(a1) +; CHECK-RV32-NEXT: addi a1, a1, %lo(h) +; CHECK-RV32-NEXT: lw a1, 4(a1) +; CHECK-RV32-NEXT: lui a3, %hi(i) +; CHECK-RV32-NEXT: lw a2, %lo(i)(a3) +; CHECK-RV32-NEXT: addi a3, a3, %lo(i) +; CHECK-RV32-NEXT: lw a3, 4(a3) +; CHECK-RV32-NEXT: call __adddf3 +; CHECK-RV32-NEXT: lui a2, %hi(g) +; CHECK-RV32-NEXT: addi a3, a2, %lo(g) +; CHECK-RV32-NEXT: sw a1, 4(a3) +; CHECK-RV32-NEXT: sw a0, %lo(g)(a2) +; CHECK-RV32-NEXT: lw t6, 0(sp) +; CHECK-RV32-NEXT: lw t5, 4(sp) +; CHECK-RV32-NEXT: lw t4, 8(sp) +; CHECK-RV32-NEXT: lw t3, 12(sp) +; CHECK-RV32-NEXT: lw a7, 16(sp) +; CHECK-RV32-NEXT: lw a6, 20(sp) +; CHECK-RV32-NEXT: lw a5, 24(sp) +; CHECK-RV32-NEXT: lw a4, 28(sp) +; CHECK-RV32-NEXT: lw a3, 32(sp) +; CHECK-RV32-NEXT: lw a2, 36(sp) +; CHECK-RV32-NEXT: lw a1, 40(sp) +; CHECK-RV32-NEXT: lw a0, 44(sp) +; CHECK-RV32-NEXT: lw t2, 48(sp) +; CHECK-RV32-NEXT: lw t1, 52(sp) +; CHECK-RV32-NEXT: lw t0, 56(sp) +; CHECK-RV32-NEXT: lw ra, 60(sp) +; CHECK-RV32-NEXT: addi sp, sp, 64 +; CHECK-RV32-NEXT: mret +; +; CHECK-RV32-F-LABEL: foo_double: +; CHECK-RV32-F: # %bb.0: +; CHECK-RV32-F-NEXT: addi sp, sp, -192 +; CHECK-RV32-F-NEXT: sw ra, 188(sp) +; CHECK-RV32-F-NEXT: sw t0, 184(sp) +; CHECK-RV32-F-NEXT: sw t1, 180(sp) +; CHECK-RV32-F-NEXT: sw t2, 176(sp) +; CHECK-RV32-F-NEXT: sw a0, 172(sp) +; CHECK-RV32-F-NEXT: sw a1, 168(sp) +; CHECK-RV32-F-NEXT: sw a2, 164(sp) +; CHECK-RV32-F-NEXT: sw a3, 160(sp) +; CHECK-RV32-F-NEXT: sw a4, 156(sp) +; CHECK-RV32-F-NEXT: sw a5, 152(sp) +; CHECK-RV32-F-NEXT: sw a6, 148(sp) +; CHECK-RV32-F-NEXT: sw a7, 144(sp) +; CHECK-RV32-F-NEXT: sw t3, 140(sp) +; CHECK-RV32-F-NEXT: sw t4, 136(sp) +; CHECK-RV32-F-NEXT: sw t5, 132(sp) +; CHECK-RV32-F-NEXT: sw t6, 128(sp) +; CHECK-RV32-F-NEXT: fsw ft0, 124(sp) +; CHECK-RV32-F-NEXT: fsw ft1, 120(sp) +; CHECK-RV32-F-NEXT: fsw ft2, 116(sp) +; CHECK-RV32-F-NEXT: fsw ft3, 112(sp) +; CHECK-RV32-F-NEXT: fsw ft4, 108(sp) +; CHECK-RV32-F-NEXT: fsw ft5, 104(sp) +; CHECK-RV32-F-NEXT: fsw ft6, 100(sp) +; CHECK-RV32-F-NEXT: fsw ft7, 96(sp) +; CHECK-RV32-F-NEXT: fsw fa0, 92(sp) +; CHECK-RV32-F-NEXT: fsw fa1, 88(sp) +; CHECK-RV32-F-NEXT: fsw fa2, 84(sp) +; CHECK-RV32-F-NEXT: fsw fa3, 80(sp) +; CHECK-RV32-F-NEXT: fsw fa4, 76(sp) +; CHECK-RV32-F-NEXT: fsw fa5, 72(sp) +; CHECK-RV32-F-NEXT: fsw fa6, 68(sp) +; CHECK-RV32-F-NEXT: fsw fa7, 64(sp) +; CHECK-RV32-F-NEXT: fsw ft8, 60(sp) +; CHECK-RV32-F-NEXT: fsw ft9, 56(sp) +; CHECK-RV32-F-NEXT: fsw ft10, 52(sp) +; CHECK-RV32-F-NEXT: fsw ft11, 48(sp) +; CHECK-RV32-F-NEXT: fsw fs0, 44(sp) +; CHECK-RV32-F-NEXT: fsw fs1, 40(sp) +; CHECK-RV32-F-NEXT: fsw fs2, 36(sp) +; CHECK-RV32-F-NEXT: fsw fs3, 32(sp) +; CHECK-RV32-F-NEXT: fsw fs4, 28(sp) +; CHECK-RV32-F-NEXT: fsw fs5, 24(sp) +; CHECK-RV32-F-NEXT: fsw fs6, 20(sp) +; CHECK-RV32-F-NEXT: fsw fs7, 16(sp) +; CHECK-RV32-F-NEXT: fsw fs8, 12(sp) +; CHECK-RV32-F-NEXT: fsw fs9, 8(sp) +; CHECK-RV32-F-NEXT: fsw fs10, 4(sp) +; CHECK-RV32-F-NEXT: fsw fs11, 0(sp) +; CHECK-RV32-F-NEXT: lui a1, %hi(h) +; CHECK-RV32-F-NEXT: lw a0, %lo(h)(a1) +; CHECK-RV32-F-NEXT: addi a1, a1, %lo(h) +; CHECK-RV32-F-NEXT: lw a1, 4(a1) +; CHECK-RV32-F-NEXT: lui a3, %hi(i) +; CHECK-RV32-F-NEXT: lw a2, %lo(i)(a3) +; CHECK-RV32-F-NEXT: addi a3, a3, %lo(i) +; CHECK-RV32-F-NEXT: lw a3, 4(a3) +; CHECK-RV32-F-NEXT: call __adddf3 +; CHECK-RV32-F-NEXT: lui a2, %hi(g) +; CHECK-RV32-F-NEXT: addi a3, a2, %lo(g) +; CHECK-RV32-F-NEXT: sw a1, 4(a3) +; CHECK-RV32-F-NEXT: sw a0, %lo(g)(a2) +; CHECK-RV32-F-NEXT: flw fs11, 0(sp) +; CHECK-RV32-F-NEXT: flw fs10, 4(sp) +; CHECK-RV32-F-NEXT: flw fs9, 8(sp) +; CHECK-RV32-F-NEXT: flw fs8, 12(sp) +; CHECK-RV32-F-NEXT: flw fs7, 16(sp) +; CHECK-RV32-F-NEXT: flw fs6, 20(sp) +; CHECK-RV32-F-NEXT: flw fs5, 24(sp) +; CHECK-RV32-F-NEXT: flw fs4, 28(sp) +; CHECK-RV32-F-NEXT: flw fs3, 32(sp) +; CHECK-RV32-F-NEXT: flw fs2, 36(sp) +; CHECK-RV32-F-NEXT: flw fs1, 40(sp) +; CHECK-RV32-F-NEXT: flw fs0, 44(sp) +; CHECK-RV32-F-NEXT: flw ft11, 48(sp) +; CHECK-RV32-F-NEXT: flw ft10, 52(sp) +; CHECK-RV32-F-NEXT: flw ft9, 56(sp) +; CHECK-RV32-F-NEXT: flw ft8, 60(sp) +; CHECK-RV32-F-NEXT: flw fa7, 64(sp) +; CHECK-RV32-F-NEXT: flw fa6, 68(sp) +; CHECK-RV32-F-NEXT: flw fa5, 72(sp) +; CHECK-RV32-F-NEXT: flw fa4, 76(sp) +; CHECK-RV32-F-NEXT: flw fa3, 80(sp) +; CHECK-RV32-F-NEXT: flw fa2, 84(sp) +; CHECK-RV32-F-NEXT: flw fa1, 88(sp) +; CHECK-RV32-F-NEXT: flw fa0, 92(sp) +; CHECK-RV32-F-NEXT: flw ft7, 96(sp) +; CHECK-RV32-F-NEXT: flw ft6, 100(sp) +; CHECK-RV32-F-NEXT: flw ft5, 104(sp) +; CHECK-RV32-F-NEXT: flw ft4, 108(sp) +; CHECK-RV32-F-NEXT: flw ft3, 112(sp) +; CHECK-RV32-F-NEXT: flw ft2, 116(sp) +; CHECK-RV32-F-NEXT: flw ft1, 120(sp) +; CHECK-RV32-F-NEXT: flw ft0, 124(sp) +; CHECK-RV32-F-NEXT: lw t6, 128(sp) +; CHECK-RV32-F-NEXT: lw t5, 132(sp) +; CHECK-RV32-F-NEXT: lw t4, 136(sp) +; CHECK-RV32-F-NEXT: lw t3, 140(sp) +; CHECK-RV32-F-NEXT: lw a7, 144(sp) +; CHECK-RV32-F-NEXT: lw a6, 148(sp) +; CHECK-RV32-F-NEXT: lw a5, 152(sp) +; CHECK-RV32-F-NEXT: lw a4, 156(sp) +; CHECK-RV32-F-NEXT: lw a3, 160(sp) +; CHECK-RV32-F-NEXT: lw a2, 164(sp) +; CHECK-RV32-F-NEXT: lw a1, 168(sp) +; CHECK-RV32-F-NEXT: lw a0, 172(sp) +; CHECK-RV32-F-NEXT: lw t2, 176(sp) +; CHECK-RV32-F-NEXT: lw t1, 180(sp) +; CHECK-RV32-F-NEXT: lw t0, 184(sp) +; CHECK-RV32-F-NEXT: lw ra, 188(sp) +; CHECK-RV32-F-NEXT: addi sp, sp, 192 +; CHECK-RV32-F-NEXT: mret +; ; CHECK-RV32-FD-LABEL: foo_double: ; CHECK-RV32-FD: # %bb.0: ; CHECK-RV32-FD-NEXT: addi sp, sp, -32 ; CHECK-RV32-FD-NEXT: sw a0, 28(sp) ; CHECK-RV32-FD-NEXT: fsd ft0, 16(sp) ; CHECK-RV32-FD-NEXT: fsd ft1, 8(sp) -; CHECK-RV32-FD-NEXT: lui a0, %hi(i) -; CHECK-RV32-FD-NEXT: fld ft0, %lo(i)(a0) ; CHECK-RV32-FD-NEXT: lui a0, %hi(h) -; CHECK-RV32-FD-NEXT: fld ft1, %lo(h)(a0) -; CHECK-RV32-FD-NEXT: fadd.d ft0, ft1, ft0 +; CHECK-RV32-FD-NEXT: fld ft0, %lo(h)(a0) +; CHECK-RV32-FD-NEXT: lui a0, %hi(i) +; CHECK-RV32-FD-NEXT: fld ft1, %lo(i)(a0) +; CHECK-RV32-FD-NEXT: fadd.d ft0, ft0, ft1 ; CHECK-RV32-FD-NEXT: lui a0, %hi(g) ; CHECK-RV32-FD-NEXT: fsd ft0, %lo(g)(a0) ; CHECK-RV32-FD-NEXT: fld ft1, 8(sp) @@ -170,7 +546,6 @@ ; CHECK-RV32-FD-NEXT: lw a0, 28(sp) ; CHECK-RV32-FD-NEXT: addi sp, sp, 32 ; CHECK-RV32-FD-NEXT: mret -; %1 = load double, double* @h %2 = load double, double* @i %add = fadd double %1, %2 @@ -182,6 +557,178 @@ ; Additionally check frame pointer and return address are properly saved. ; define void @foo_fp_double() nounwind #1 { +; CHECK-RV32-LABEL: foo_fp_double: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -80 +; CHECK-RV32-NEXT: sw ra, 76(sp) +; CHECK-RV32-NEXT: sw t0, 72(sp) +; CHECK-RV32-NEXT: sw t1, 68(sp) +; CHECK-RV32-NEXT: sw t2, 64(sp) +; CHECK-RV32-NEXT: sw s0, 60(sp) +; CHECK-RV32-NEXT: sw a0, 56(sp) +; CHECK-RV32-NEXT: sw a1, 52(sp) +; CHECK-RV32-NEXT: sw a2, 48(sp) +; CHECK-RV32-NEXT: sw a3, 44(sp) +; CHECK-RV32-NEXT: sw a4, 40(sp) +; CHECK-RV32-NEXT: sw a5, 36(sp) +; CHECK-RV32-NEXT: sw a6, 32(sp) +; CHECK-RV32-NEXT: sw a7, 28(sp) +; CHECK-RV32-NEXT: sw t3, 24(sp) +; CHECK-RV32-NEXT: sw t4, 20(sp) +; CHECK-RV32-NEXT: sw t5, 16(sp) +; CHECK-RV32-NEXT: sw t6, 12(sp) +; CHECK-RV32-NEXT: addi s0, sp, 80 +; CHECK-RV32-NEXT: lui a1, %hi(h) +; CHECK-RV32-NEXT: lw a0, %lo(h)(a1) +; CHECK-RV32-NEXT: addi a1, a1, %lo(h) +; CHECK-RV32-NEXT: lw a1, 4(a1) +; CHECK-RV32-NEXT: lui a3, %hi(i) +; CHECK-RV32-NEXT: lw a2, %lo(i)(a3) +; CHECK-RV32-NEXT: addi a3, a3, %lo(i) +; CHECK-RV32-NEXT: lw a3, 4(a3) +; CHECK-RV32-NEXT: call __adddf3 +; CHECK-RV32-NEXT: lui a2, %hi(g) +; CHECK-RV32-NEXT: addi a3, a2, %lo(g) +; CHECK-RV32-NEXT: sw a1, 4(a3) +; CHECK-RV32-NEXT: sw a0, %lo(g)(a2) +; CHECK-RV32-NEXT: lw t6, 12(sp) +; CHECK-RV32-NEXT: lw t5, 16(sp) +; CHECK-RV32-NEXT: lw t4, 20(sp) +; CHECK-RV32-NEXT: lw t3, 24(sp) +; CHECK-RV32-NEXT: lw a7, 28(sp) +; CHECK-RV32-NEXT: lw a6, 32(sp) +; CHECK-RV32-NEXT: lw a5, 36(sp) +; CHECK-RV32-NEXT: lw a4, 40(sp) +; CHECK-RV32-NEXT: lw a3, 44(sp) +; CHECK-RV32-NEXT: lw a2, 48(sp) +; CHECK-RV32-NEXT: lw a1, 52(sp) +; CHECK-RV32-NEXT: lw a0, 56(sp) +; CHECK-RV32-NEXT: lw s0, 60(sp) +; CHECK-RV32-NEXT: lw t2, 64(sp) +; CHECK-RV32-NEXT: lw t1, 68(sp) +; CHECK-RV32-NEXT: lw t0, 72(sp) +; CHECK-RV32-NEXT: lw ra, 76(sp) +; CHECK-RV32-NEXT: addi sp, sp, 80 +; CHECK-RV32-NEXT: mret +; +; CHECK-RV32-F-LABEL: foo_fp_double: +; CHECK-RV32-F: # %bb.0: +; CHECK-RV32-F-NEXT: addi sp, sp, -208 +; CHECK-RV32-F-NEXT: sw ra, 204(sp) +; CHECK-RV32-F-NEXT: sw t0, 200(sp) +; CHECK-RV32-F-NEXT: sw t1, 196(sp) +; CHECK-RV32-F-NEXT: sw t2, 192(sp) +; CHECK-RV32-F-NEXT: sw s0, 188(sp) +; CHECK-RV32-F-NEXT: sw a0, 184(sp) +; CHECK-RV32-F-NEXT: sw a1, 180(sp) +; CHECK-RV32-F-NEXT: sw a2, 176(sp) +; CHECK-RV32-F-NEXT: sw a3, 172(sp) +; CHECK-RV32-F-NEXT: sw a4, 168(sp) +; CHECK-RV32-F-NEXT: sw a5, 164(sp) +; CHECK-RV32-F-NEXT: sw a6, 160(sp) +; CHECK-RV32-F-NEXT: sw a7, 156(sp) +; CHECK-RV32-F-NEXT: sw t3, 152(sp) +; CHECK-RV32-F-NEXT: sw t4, 148(sp) +; CHECK-RV32-F-NEXT: sw t5, 144(sp) +; CHECK-RV32-F-NEXT: sw t6, 140(sp) +; CHECK-RV32-F-NEXT: fsw ft0, 136(sp) +; CHECK-RV32-F-NEXT: fsw ft1, 132(sp) +; CHECK-RV32-F-NEXT: fsw ft2, 128(sp) +; CHECK-RV32-F-NEXT: fsw ft3, 124(sp) +; CHECK-RV32-F-NEXT: fsw ft4, 120(sp) +; CHECK-RV32-F-NEXT: fsw ft5, 116(sp) +; CHECK-RV32-F-NEXT: fsw ft6, 112(sp) +; CHECK-RV32-F-NEXT: fsw ft7, 108(sp) +; CHECK-RV32-F-NEXT: fsw fa0, 104(sp) +; CHECK-RV32-F-NEXT: fsw fa1, 100(sp) +; CHECK-RV32-F-NEXT: fsw fa2, 96(sp) +; CHECK-RV32-F-NEXT: fsw fa3, 92(sp) +; CHECK-RV32-F-NEXT: fsw fa4, 88(sp) +; CHECK-RV32-F-NEXT: fsw fa5, 84(sp) +; CHECK-RV32-F-NEXT: fsw fa6, 80(sp) +; CHECK-RV32-F-NEXT: fsw fa7, 76(sp) +; CHECK-RV32-F-NEXT: fsw ft8, 72(sp) +; CHECK-RV32-F-NEXT: fsw ft9, 68(sp) +; CHECK-RV32-F-NEXT: fsw ft10, 64(sp) +; CHECK-RV32-F-NEXT: fsw ft11, 60(sp) +; CHECK-RV32-F-NEXT: fsw fs0, 56(sp) +; CHECK-RV32-F-NEXT: fsw fs1, 52(sp) +; CHECK-RV32-F-NEXT: fsw fs2, 48(sp) +; CHECK-RV32-F-NEXT: fsw fs3, 44(sp) +; CHECK-RV32-F-NEXT: fsw fs4, 40(sp) +; CHECK-RV32-F-NEXT: fsw fs5, 36(sp) +; CHECK-RV32-F-NEXT: fsw fs6, 32(sp) +; CHECK-RV32-F-NEXT: fsw fs7, 28(sp) +; CHECK-RV32-F-NEXT: fsw fs8, 24(sp) +; CHECK-RV32-F-NEXT: fsw fs9, 20(sp) +; CHECK-RV32-F-NEXT: fsw fs10, 16(sp) +; CHECK-RV32-F-NEXT: fsw fs11, 12(sp) +; CHECK-RV32-F-NEXT: addi s0, sp, 208 +; CHECK-RV32-F-NEXT: lui a1, %hi(h) +; CHECK-RV32-F-NEXT: lw a0, %lo(h)(a1) +; CHECK-RV32-F-NEXT: addi a1, a1, %lo(h) +; CHECK-RV32-F-NEXT: lw a1, 4(a1) +; CHECK-RV32-F-NEXT: lui a3, %hi(i) +; CHECK-RV32-F-NEXT: lw a2, %lo(i)(a3) +; CHECK-RV32-F-NEXT: addi a3, a3, %lo(i) +; CHECK-RV32-F-NEXT: lw a3, 4(a3) +; CHECK-RV32-F-NEXT: call __adddf3 +; CHECK-RV32-F-NEXT: lui a2, %hi(g) +; CHECK-RV32-F-NEXT: addi a3, a2, %lo(g) +; CHECK-RV32-F-NEXT: sw a1, 4(a3) +; CHECK-RV32-F-NEXT: sw a0, %lo(g)(a2) +; CHECK-RV32-F-NEXT: flw fs11, 12(sp) +; CHECK-RV32-F-NEXT: flw fs10, 16(sp) +; CHECK-RV32-F-NEXT: flw fs9, 20(sp) +; CHECK-RV32-F-NEXT: flw fs8, 24(sp) +; CHECK-RV32-F-NEXT: flw fs7, 28(sp) +; CHECK-RV32-F-NEXT: flw fs6, 32(sp) +; CHECK-RV32-F-NEXT: flw fs5, 36(sp) +; CHECK-RV32-F-NEXT: flw fs4, 40(sp) +; CHECK-RV32-F-NEXT: flw fs3, 44(sp) +; CHECK-RV32-F-NEXT: flw fs2, 48(sp) +; CHECK-RV32-F-NEXT: flw fs1, 52(sp) +; CHECK-RV32-F-NEXT: flw fs0, 56(sp) +; CHECK-RV32-F-NEXT: flw ft11, 60(sp) +; CHECK-RV32-F-NEXT: flw ft10, 64(sp) +; CHECK-RV32-F-NEXT: flw ft9, 68(sp) +; CHECK-RV32-F-NEXT: flw ft8, 72(sp) +; CHECK-RV32-F-NEXT: flw fa7, 76(sp) +; CHECK-RV32-F-NEXT: flw fa6, 80(sp) +; CHECK-RV32-F-NEXT: flw fa5, 84(sp) +; CHECK-RV32-F-NEXT: flw fa4, 88(sp) +; CHECK-RV32-F-NEXT: flw fa3, 92(sp) +; CHECK-RV32-F-NEXT: flw fa2, 96(sp) +; CHECK-RV32-F-NEXT: flw fa1, 100(sp) +; CHECK-RV32-F-NEXT: flw fa0, 104(sp) +; CHECK-RV32-F-NEXT: flw ft7, 108(sp) +; CHECK-RV32-F-NEXT: flw ft6, 112(sp) +; CHECK-RV32-F-NEXT: flw ft5, 116(sp) +; CHECK-RV32-F-NEXT: flw ft4, 120(sp) +; CHECK-RV32-F-NEXT: flw ft3, 124(sp) +; CHECK-RV32-F-NEXT: flw ft2, 128(sp) +; CHECK-RV32-F-NEXT: flw ft1, 132(sp) +; CHECK-RV32-F-NEXT: flw ft0, 136(sp) +; CHECK-RV32-F-NEXT: lw t6, 140(sp) +; CHECK-RV32-F-NEXT: lw t5, 144(sp) +; CHECK-RV32-F-NEXT: lw t4, 148(sp) +; CHECK-RV32-F-NEXT: lw t3, 152(sp) +; CHECK-RV32-F-NEXT: lw a7, 156(sp) +; CHECK-RV32-F-NEXT: lw a6, 160(sp) +; CHECK-RV32-F-NEXT: lw a5, 164(sp) +; CHECK-RV32-F-NEXT: lw a4, 168(sp) +; CHECK-RV32-F-NEXT: lw a3, 172(sp) +; CHECK-RV32-F-NEXT: lw a2, 176(sp) +; CHECK-RV32-F-NEXT: lw a1, 180(sp) +; CHECK-RV32-F-NEXT: lw a0, 184(sp) +; CHECK-RV32-F-NEXT: lw s0, 188(sp) +; CHECK-RV32-F-NEXT: lw t2, 192(sp) +; CHECK-RV32-F-NEXT: lw t1, 196(sp) +; CHECK-RV32-F-NEXT: lw t0, 200(sp) +; CHECK-RV32-F-NEXT: lw ra, 204(sp) +; CHECK-RV32-F-NEXT: addi sp, sp, 208 +; CHECK-RV32-F-NEXT: mret +; ; CHECK-RV32-FD-LABEL: foo_fp_double: ; CHECK-RV32-FD: # %bb.0: ; CHECK-RV32-FD-NEXT: addi sp, sp, -32 @@ -191,11 +738,11 @@ ; CHECK-RV32-FD-NEXT: fsd ft0, 8(sp) ; CHECK-RV32-FD-NEXT: fsd ft1, 0(sp) ; CHECK-RV32-FD-NEXT: addi s0, sp, 32 -; CHECK-RV32-FD-NEXT: lui a0, %hi(i) -; CHECK-RV32-FD-NEXT: fld ft0, %lo(i)(a0) ; CHECK-RV32-FD-NEXT: lui a0, %hi(h) -; CHECK-RV32-FD-NEXT: fld ft1, %lo(h)(a0) -; CHECK-RV32-FD-NEXT: fadd.d ft0, ft1, ft0 +; CHECK-RV32-FD-NEXT: fld ft0, %lo(h)(a0) +; CHECK-RV32-FD-NEXT: lui a0, %hi(i) +; CHECK-RV32-FD-NEXT: fld ft1, %lo(i)(a0) +; CHECK-RV32-FD-NEXT: fadd.d ft0, ft0, ft1 ; CHECK-RV32-FD-NEXT: lui a0, %hi(g) ; CHECK-RV32-FD-NEXT: fsd ft0, %lo(g)(a0) ; CHECK-RV32-FD-NEXT: fld ft1, 0(sp) @@ -205,7 +752,6 @@ ; CHECK-RV32-FD-NEXT: lw ra, 28(sp) ; CHECK-RV32-FD-NEXT: addi sp, sp, 32 ; CHECK-RV32-FD-NEXT: mret -; %1 = load double, double* @h %2 = load double, double* @i %add = fadd double %1, %2 diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll --- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll +++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll @@ -15,9 +15,9 @@ ; ; RV64-LABEL: test1: ; RV64: # %bb.0: # %entry +; RV64-NEXT: lw a1, 0(a1) ; RV64-NEXT: addi a2, zero, 1 ; RV64-NEXT: slli a2, a2, 31 -; RV64-NEXT: lw a1, 0(a1) ; RV64-NEXT: xor a1, a1, a2 ; RV64-NEXT: sw a1, 0(a0) ; RV64-NEXT: ret @@ -33,17 +33,17 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lui a3, 524288 +; RV32-NEXT: xor a2, a2, a3 ; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: xor a1, a2, a1 -; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: sw a2, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test2: ; RV64: # %bb.0: # %entry +; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: addi a2, zero, -1 ; RV64-NEXT: slli a2, a2, 63 -; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: xor a1, a1, a2 ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret @@ -57,27 +57,27 @@ define void @test3(fp128* %a, fp128* %b) nounwind { ; RV32-LABEL: test3: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a2, 12(a1) -; RV32-NEXT: lw a3, 4(a1) -; RV32-NEXT: lw a4, 0(a1) -; RV32-NEXT: lw a1, 8(a1) -; RV32-NEXT: sw a1, 8(a0) -; RV32-NEXT: sw a4, 0(a0) -; RV32-NEXT: sw a3, 4(a0) -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: xor a1, a2, a1 -; RV32-NEXT: sw a1, 12(a0) +; RV32-NEXT: lw a2, 4(a1) +; RV32-NEXT: lw a3, 12(a1) +; RV32-NEXT: lw a4, 8(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: xor a3, a3, a5 +; RV32-NEXT: sw a4, 8(a0) +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a2, 4(a0) +; RV32-NEXT: sw a3, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test3: ; RV64: # %bb.0: # %entry ; RV64-NEXT: ld a2, 8(a1) ; RV64-NEXT: ld a1, 0(a1) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: slli a3, a3, 63 +; RV64-NEXT: xor a2, a2, a3 ; RV64-NEXT: sd a1, 0(a0) -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: slli a1, a1, 63 -; RV64-NEXT: xor a1, a2, a1 -; RV64-NEXT: sd a1, 8(a0) +; RV64-NEXT: sd a2, 8(a0) ; RV64-NEXT: ret entry: %0 = load fp128, fp128* %b diff --git a/llvm/test/CodeGen/RISCV/lsr-legaladdimm.ll b/llvm/test/CodeGen/RISCV/lsr-legaladdimm.ll --- a/llvm/test/CodeGen/RISCV/lsr-legaladdimm.ll +++ b/llvm/test/CodeGen/RISCV/lsr-legaladdimm.ll @@ -11,21 +11,21 @@ define i32 @main() nounwind { ; RV32I-LABEL: main: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lui a0, %hi(b) -; RV32I-NEXT: addi a0, a0, %lo(b) -; RV32I-NEXT: lui a1, %hi(a) -; RV32I-NEXT: addi a1, a1, %lo(a) -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: mv a0, zero +; RV32I-NEXT: lui a1, %hi(b) +; RV32I-NEXT: addi a1, a1, %lo(b) +; RV32I-NEXT: lui a2, %hi(a) +; RV32I-NEXT: addi a2, a2, %lo(a) +; RV32I-NEXT: lui a3, 1 ; RV32I-NEXT: .LBB0_1: # %for.body ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: addi a4, a3, -2048 -; RV32I-NEXT: sw a4, 0(a1) +; RV32I-NEXT: addi a4, a0, -2048 +; RV32I-NEXT: sw a4, 0(a2) +; RV32I-NEXT: sw a0, 0(a1) +; RV32I-NEXT: addi a0, a0, 1 ; RV32I-NEXT: addi a1, a1, 4 -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: addi a0, a0, 4 -; RV32I-NEXT: addi a3, a3, 1 -; RV32I-NEXT: bne a3, a2, .LBB0_1 +; RV32I-NEXT: addi a2, a2, 4 +; RV32I-NEXT: bne a0, a3, .LBB0_1 ; RV32I-NEXT: # %bb.2: # %for.end ; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -7,8 +7,9 @@ define i32 @lb(i8 *%a) nounwind { ; RV32I-LABEL: lb: ; RV32I: # %bb.0: -; RV32I-NEXT: lb a1, 0(a0) -; RV32I-NEXT: lb a0, 1(a0) +; RV32I-NEXT: lb a1, 1(a0) +; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = getelementptr i8, i8* %a, i32 1 %2 = load i8, i8* %1 @@ -21,8 +22,9 @@ define i32 @lh(i16 *%a) nounwind { ; RV32I-LABEL: lh: ; RV32I: # %bb.0: -; RV32I-NEXT: lh a1, 0(a0) -; RV32I-NEXT: lh a0, 4(a0) +; RV32I-NEXT: lh a1, 4(a0) +; RV32I-NEXT: lh a0, 0(a0) +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = getelementptr i16, i16* %a, i32 2 %2 = load i16, i16* %1 @@ -35,8 +37,9 @@ define i32 @lw(i32 *%a) nounwind { ; RV32I-LABEL: lw: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a1, 0(a0) -; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = getelementptr i32, i32* %a, i32 3 %2 = load i32, i32* %1 @@ -47,9 +50,9 @@ define i32 @lbu(i8 *%a) nounwind { ; RV32I-LABEL: lbu: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a1, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lbu a1, 4(a0) +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: ret %1 = getelementptr i8, i8* %a, i32 4 %2 = load i8, i8* %1 @@ -63,9 +66,9 @@ define i32 @lhu(i16 *%a) nounwind { ; RV32I-LABEL: lhu: ; RV32I: # %bb.0: -; RV32I-NEXT: lhu a1, 0(a0) -; RV32I-NEXT: lhu a0, 10(a0) -; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lhu a1, 10(a0) +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: ret %1 = getelementptr i16, i16* %a, i32 5 %2 = load i16, i16* %1 @@ -81,8 +84,8 @@ define void @sb(i8 *%a, i8 %b) nounwind { ; RV32I-LABEL: sb: ; RV32I: # %bb.0: -; RV32I-NEXT: sb a1, 6(a0) ; RV32I-NEXT: sb a1, 0(a0) +; RV32I-NEXT: sb a1, 6(a0) ; RV32I-NEXT: ret store i8 %b, i8* %a %1 = getelementptr i8, i8* %a, i32 6 @@ -93,8 +96,8 @@ define void @sh(i16 *%a, i16 %b) nounwind { ; RV32I-LABEL: sh: ; RV32I: # %bb.0: -; RV32I-NEXT: sh a1, 14(a0) ; RV32I-NEXT: sh a1, 0(a0) +; RV32I-NEXT: sh a1, 14(a0) ; RV32I-NEXT: ret store i16 %b, i16* %a %1 = getelementptr i16, i16* %a, i32 7 @@ -105,8 +108,8 @@ define void @sw(i32 *%a, i32 %b) nounwind { ; RV32I-LABEL: sw: ; RV32I: # %bb.0: -; RV32I-NEXT: sw a1, 32(a0) ; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a1, 32(a0) ; RV32I-NEXT: ret store i32 %b, i32* %a %1 = getelementptr i32, i32* %a, i32 8 @@ -118,10 +121,10 @@ define i32 @load_sext_zext_anyext_i1(i1 *%a) nounwind { ; RV32I-LABEL: load_sext_zext_anyext_i1: ; RV32I: # %bb.0: -; RV32I-NEXT: lb a1, 0(a0) ; RV32I-NEXT: lbu a1, 1(a0) -; RV32I-NEXT: lbu a0, 2(a0) -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lbu a2, 2(a0) +; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: sub a0, a2, a1 ; RV32I-NEXT: ret ; sextload i1 %1 = getelementptr i1, i1* %a, i32 1 @@ -140,10 +143,10 @@ define i16 @load_sext_zext_anyext_i1_i16(i1 *%a) nounwind { ; RV32I-LABEL: load_sext_zext_anyext_i1_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: lb a1, 0(a0) ; RV32I-NEXT: lbu a1, 1(a0) -; RV32I-NEXT: lbu a0, 2(a0) -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lbu a2, 2(a0) +; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: sub a0, a2, a1 ; RV32I-NEXT: ret ; sextload i1 %1 = getelementptr i1, i1* %a, i32 1 diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -7,8 +7,9 @@ define i64 @lb(i8 *%a) nounwind { ; RV64I-LABEL: lb: ; RV64I: # %bb.0: -; RV64I-NEXT: lb a1, 0(a0) -; RV64I-NEXT: lb a0, 1(a0) +; RV64I-NEXT: lb a1, 1(a0) +; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = getelementptr i8, i8* %a, i32 1 %2 = load i8, i8* %1 @@ -21,8 +22,9 @@ define i64 @lh(i16 *%a) nounwind { ; RV64I-LABEL: lh: ; RV64I: # %bb.0: -; RV64I-NEXT: lh a1, 0(a0) -; RV64I-NEXT: lh a0, 4(a0) +; RV64I-NEXT: lh a1, 4(a0) +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = getelementptr i16, i16* %a, i32 2 %2 = load i16, i16* %1 @@ -35,8 +37,9 @@ define i64 @lw(i32 *%a) nounwind { ; RV64I-LABEL: lw: ; RV64I: # %bb.0: -; RV64I-NEXT: lw a1, 0(a0) -; RV64I-NEXT: lw a0, 12(a0) +; RV64I-NEXT: lw a1, 12(a0) +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = getelementptr i32, i32* %a, i32 3 %2 = load i32, i32* %1 @@ -49,9 +52,9 @@ define i64 @lbu(i8 *%a) nounwind { ; RV64I-LABEL: lbu: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a1, 0(a0) -; RV64I-NEXT: lbu a0, 4(a0) -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lbu a1, 4(a0) +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %1 = getelementptr i8, i8* %a, i32 4 %2 = load i8, i8* %1 @@ -65,9 +68,9 @@ define i64 @lhu(i16 *%a) nounwind { ; RV64I-LABEL: lhu: ; RV64I: # %bb.0: -; RV64I-NEXT: lhu a1, 0(a0) -; RV64I-NEXT: lhu a0, 10(a0) -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lhu a1, 10(a0) +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %1 = getelementptr i16, i16* %a, i32 5 %2 = load i16, i16* %1 @@ -81,9 +84,9 @@ define i64 @lwu(i32 *%a) nounwind { ; RV64I-LABEL: lwu: ; RV64I: # %bb.0: -; RV64I-NEXT: lwu a1, 0(a0) -; RV64I-NEXT: lwu a0, 24(a0) -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lwu a1, 24(a0) +; RV64I-NEXT: lwu a0, 0(a0) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %1 = getelementptr i32, i32* %a, i32 6 %2 = load i32, i32* %1 @@ -99,8 +102,8 @@ define void @sb(i8 *%a, i8 %b) nounwind { ; RV64I-LABEL: sb: ; RV64I: # %bb.0: -; RV64I-NEXT: sb a1, 7(a0) ; RV64I-NEXT: sb a1, 0(a0) +; RV64I-NEXT: sb a1, 7(a0) ; RV64I-NEXT: ret store i8 %b, i8* %a %1 = getelementptr i8, i8* %a, i32 7 @@ -111,8 +114,8 @@ define void @sh(i16 *%a, i16 %b) nounwind { ; RV64I-LABEL: sh: ; RV64I: # %bb.0: -; RV64I-NEXT: sh a1, 16(a0) ; RV64I-NEXT: sh a1, 0(a0) +; RV64I-NEXT: sh a1, 16(a0) ; RV64I-NEXT: ret store i16 %b, i16* %a %1 = getelementptr i16, i16* %a, i32 8 @@ -123,8 +126,8 @@ define void @sw(i32 *%a, i32 %b) nounwind { ; RV64I-LABEL: sw: ; RV64I: # %bb.0: -; RV64I-NEXT: sw a1, 36(a0) ; RV64I-NEXT: sw a1, 0(a0) +; RV64I-NEXT: sw a1, 36(a0) ; RV64I-NEXT: ret store i32 %b, i32* %a %1 = getelementptr i32, i32* %a, i32 9 @@ -137,8 +140,9 @@ define i64 @ld(i64 *%a) nounwind { ; RV64I-LABEL: ld: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a1, 0(a0) -; RV64I-NEXT: ld a0, 80(a0) +; RV64I-NEXT: ld a1, 80(a0) +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = getelementptr i64, i64* %a, i32 10 %2 = load i64, i64* %1 @@ -149,8 +153,8 @@ define void @sd(i64 *%a, i64 %b) nounwind { ; RV64I-LABEL: sd: ; RV64I: # %bb.0: -; RV64I-NEXT: sd a1, 88(a0) ; RV64I-NEXT: sd a1, 0(a0) +; RV64I-NEXT: sd a1, 88(a0) ; RV64I-NEXT: ret store i64 %b, i64* %a %1 = getelementptr i64, i64* %a, i32 11 @@ -162,10 +166,10 @@ define i64 @load_sext_zext_anyext_i1(i1 *%a) nounwind { ; RV64I-LABEL: load_sext_zext_anyext_i1: ; RV64I: # %bb.0: -; RV64I-NEXT: lb a1, 0(a0) ; RV64I-NEXT: lbu a1, 1(a0) -; RV64I-NEXT: lbu a0, 2(a0) -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lbu a2, 2(a0) +; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: sub a0, a2, a1 ; RV64I-NEXT: ret ; sextload i1 %1 = getelementptr i1, i1* %a, i32 1 @@ -184,10 +188,10 @@ define i16 @load_sext_zext_anyext_i1_i16(i1 *%a) nounwind { ; RV64I-LABEL: load_sext_zext_anyext_i1_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: lb a1, 0(a0) ; RV64I-NEXT: lbu a1, 1(a0) -; RV64I-NEXT: lbu a0, 2(a0) -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lbu a2, 2(a0) +; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: sub a0, a2, a1 ; RV64I-NEXT: ret ; sextload i1 %1 = getelementptr i1, i1* %a, i32 1 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -247,8 +247,8 @@ ; ; RV64IM-LABEL: mulhs: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a1 ; RV64IM-NEXT: sext.w a0, a0 +; RV64IM-NEXT: sext.w a1, a1 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll --- a/llvm/test/CodeGen/RISCV/remat.ll +++ b/llvm/test/CodeGen/RISCV/remat.ll @@ -37,19 +37,19 @@ ; RV32I-NEXT: sw s9, 20(sp) ; RV32I-NEXT: sw s10, 16(sp) ; RV32I-NEXT: sw s11, 12(sp) -; RV32I-NEXT: lui s9, %hi(a) -; RV32I-NEXT: lw a0, %lo(a)(s9) +; RV32I-NEXT: lui s6, %hi(a) +; RV32I-NEXT: lw a0, %lo(a)(s6) ; RV32I-NEXT: beqz a0, .LBB0_11 ; RV32I-NEXT: # %bb.1: # %for.body.preheader ; RV32I-NEXT: lui s2, %hi(l) ; RV32I-NEXT: lui s3, %hi(k) ; RV32I-NEXT: lui s4, %hi(j) -; RV32I-NEXT: lui s6, %hi(i) -; RV32I-NEXT: lui s5, %hi(h) -; RV32I-NEXT: lui s7, %hi(g) -; RV32I-NEXT: lui s8, %hi(f) -; RV32I-NEXT: lui s1, %hi(e) -; RV32I-NEXT: lui s0, %hi(d) +; RV32I-NEXT: lui s5, %hi(i) +; RV32I-NEXT: lui s1, %hi(d) +; RV32I-NEXT: lui s0, %hi(e) +; RV32I-NEXT: lui s7, %hi(f) +; RV32I-NEXT: lui s8, %hi(g) +; RV32I-NEXT: lui s9, %hi(h) ; RV32I-NEXT: lui s10, %hi(c) ; RV32I-NEXT: lui s11, %hi(b) ; RV32I-NEXT: .LBB0_2: # %for.body @@ -58,10 +58,10 @@ ; RV32I-NEXT: beqz a1, .LBB0_4 ; RV32I-NEXT: # %bb.3: # %if.then ; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a4, %lo(e)(s1) -; RV32I-NEXT: lw a3, %lo(d)(s0) -; RV32I-NEXT: lw a2, %lo(c)(s10) ; RV32I-NEXT: lw a1, %lo(b)(s11) +; RV32I-NEXT: lw a2, %lo(c)(s10) +; RV32I-NEXT: lw a3, %lo(d)(s1) +; RV32I-NEXT: lw a4, %lo(e)(s0) ; RV32I-NEXT: addi a5, zero, 32 ; RV32I-NEXT: call foo ; RV32I-NEXT: .LBB0_4: # %if.end @@ -70,11 +70,11 @@ ; RV32I-NEXT: beqz a0, .LBB0_6 ; RV32I-NEXT: # %bb.5: # %if.then3 ; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a4, %lo(f)(s8) -; RV32I-NEXT: lw a3, %lo(e)(s1) -; RV32I-NEXT: lw a2, %lo(d)(s0) -; RV32I-NEXT: lw a1, %lo(c)(s10) ; RV32I-NEXT: lw a0, %lo(b)(s11) +; RV32I-NEXT: lw a1, %lo(c)(s10) +; RV32I-NEXT: lw a2, %lo(d)(s1) +; RV32I-NEXT: lw a3, %lo(e)(s0) +; RV32I-NEXT: lw a4, %lo(f)(s7) ; RV32I-NEXT: addi a5, zero, 64 ; RV32I-NEXT: call foo ; RV32I-NEXT: .LBB0_6: # %if.end5 @@ -83,31 +83,31 @@ ; RV32I-NEXT: beqz a0, .LBB0_8 ; RV32I-NEXT: # %bb.7: # %if.then7 ; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a4, %lo(g)(s7) -; RV32I-NEXT: lw a3, %lo(f)(s8) -; RV32I-NEXT: lw a2, %lo(e)(s1) -; RV32I-NEXT: lw a1, %lo(d)(s0) ; RV32I-NEXT: lw a0, %lo(c)(s10) +; RV32I-NEXT: lw a1, %lo(d)(s1) +; RV32I-NEXT: lw a2, %lo(e)(s0) +; RV32I-NEXT: lw a3, %lo(f)(s7) +; RV32I-NEXT: lw a4, %lo(g)(s8) ; RV32I-NEXT: addi a5, zero, 32 ; RV32I-NEXT: call foo ; RV32I-NEXT: .LBB0_8: # %if.end9 ; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a0, %lo(i)(s6) +; RV32I-NEXT: lw a0, %lo(i)(s5) ; RV32I-NEXT: beqz a0, .LBB0_10 ; RV32I-NEXT: # %bb.9: # %if.then11 ; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a4, %lo(h)(s5) -; RV32I-NEXT: lw a3, %lo(g)(s7) -; RV32I-NEXT: lw a2, %lo(f)(s8) -; RV32I-NEXT: lw a1, %lo(e)(s1) -; RV32I-NEXT: lw a0, %lo(d)(s0) +; RV32I-NEXT: lw a0, %lo(d)(s1) +; RV32I-NEXT: lw a1, %lo(e)(s0) +; RV32I-NEXT: lw a2, %lo(f)(s7) +; RV32I-NEXT: lw a3, %lo(g)(s8) +; RV32I-NEXT: lw a4, %lo(h)(s9) ; RV32I-NEXT: addi a5, zero, 32 ; RV32I-NEXT: call foo ; RV32I-NEXT: .LBB0_10: # %for.inc ; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a0, %lo(a)(s9) +; RV32I-NEXT: lw a0, %lo(a)(s6) ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: sw a0, %lo(a)(s9) +; RV32I-NEXT: sw a0, %lo(a)(s6) ; RV32I-NEXT: bnez a0, .LBB0_2 ; RV32I-NEXT: .LBB0_11: # %for.end ; RV32I-NEXT: addi a0, zero, 1 diff --git a/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll b/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll --- a/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll @@ -113,9 +113,9 @@ define float @bcvt_i64_to_f32_via_i32(i64 %a, i64 %b) nounwind { ; RV64IF-LABEL: bcvt_i64_to_f32_via_i32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: fadd.s ft0, ft0, ft1 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %1 = trunc i64 %a to i32 diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll --- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll @@ -13,16 +13,16 @@ ; CHECK-NEXT: sd s0, 16(sp) ; CHECK-NEXT: sd s1, 8(sp) ; CHECK-NEXT: sd s2, 0(sp) -; CHECK-NEXT: mv s0, a1 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: srli s2, a0, 32 +; CHECK-NEXT: srli s1, a1, 32 ; CHECK-NEXT: call __addsf3 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: srli a0, s1, 32 -; CHECK-NEXT: srli a1, s0, 32 +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __addsf3 -; CHECK-NEXT: slli a1, s2, 32 -; CHECK-NEXT: srli a1, a1, 32 ; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: slli a1, s0, 32 +; CHECK-NEXT: srli a1, a1, 32 ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: ld s2, 0(sp) ; CHECK-NEXT: ld s1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll --- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll @@ -8,17 +8,17 @@ ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: not a2, a0 ; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: addi a3, a0, 1 +; CHECK-NEXT: mul a3, a2, a3 +; CHECK-NEXT: slli a2, a2, 32 +; CHECK-NEXT: srli a2, a2, 32 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -2 ; CHECK-NEXT: slli a1, a1, 32 ; CHECK-NEXT: srli a1, a1, 32 -; CHECK-NEXT: slli a3, a2, 32 -; CHECK-NEXT: srli a3, a3, 32 -; CHECK-NEXT: mul a1, a3, a1 -; CHECK-NEXT: addi a3, a0, 1 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: mul a1, a2, a1 ; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: add a0, a3, a0 ; CHECK-NEXT: addw a0, a0, a1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: @@ -54,18 +54,18 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: bge a0, a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sub a2, a1, a0 -; CHECK-NEXT: addi a2, a2, -2 -; CHECK-NEXT: slli a2, a2, 32 -; CHECK-NEXT: srli a2, a2, 32 -; CHECK-NEXT: not a3, a0 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: slli a4, a1, 32 -; CHECK-NEXT: srli a4, a4, 32 -; CHECK-NEXT: mul a2, a4, a2 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub a0, a1, a0 -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: not a2, a0 +; CHECK-NEXT: add a3, a2, a1 +; CHECK-NEXT: mul a2, a3, a2 +; CHECK-NEXT: slli a3, a3, 32 +; CHECK-NEXT: srli a3, a3, 32 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: addi a1, a1, -2 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: mul a1, a3, a1 +; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: sub a0, a2, a0 ; CHECK-NEXT: subw a0, a0, a1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll @@ -40,27 +40,41 @@ ; RV32I-LABEL: cmovcc128: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: xori a1, a1, 123 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: beqz a1, .LBB1_2 +; RV32I-NEXT: or a2, a1, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: beqz a2, .LBB1_2 ; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: .LBB1_2: # %entry +; RV32I-NEXT: lw a6, 0(a1) +; RV32I-NEXT: beqz a2, .LBB1_6 +; RV32I-NEXT: # %bb.3: # %entry ; RV32I-NEXT: addi a1, a4, 4 -; RV32I-NEXT: addi a2, a4, 8 -; RV32I-NEXT: addi a5, a4, 12 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: j .LBB1_3 -; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: bnez a2, .LBB1_7 +; RV32I-NEXT: .LBB1_4: +; RV32I-NEXT: addi a1, a3, 8 +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: bnez a2, .LBB1_8 +; RV32I-NEXT: .LBB1_5: +; RV32I-NEXT: addi a2, a3, 12 +; RV32I-NEXT: j .LBB1_9 +; RV32I-NEXT: .LBB1_6: ; RV32I-NEXT: addi a1, a3, 4 -; RV32I-NEXT: addi a2, a3, 8 -; RV32I-NEXT: addi a5, a3, 12 -; RV32I-NEXT: .LBB1_3: # %entry -; RV32I-NEXT: lw a4, 0(a5) -; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: beqz a2, .LBB1_4 +; RV32I-NEXT: .LBB1_7: # %entry +; RV32I-NEXT: addi a1, a4, 8 ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: sw a1, 4(a0) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: beqz a2, .LBB1_5 +; RV32I-NEXT: .LBB1_8: # %entry +; RV32I-NEXT: addi a2, a4, 12 +; RV32I-NEXT: .LBB1_9: # %entry +; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a6, 0(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: cmovcc128: @@ -83,24 +97,24 @@ define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: cmov64: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: bnez a0, .LBB2_2 +; RV32I-NEXT: andi a5, a0, 1 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: bnez a5, .LBB2_2 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: .LBB2_2: # %entry -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cmov64: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: bnez a0, .LBB2_2 +; RV64I-NEXT: andi a3, a0, 1 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB2_2: # %entry -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret entry: %cond = select i1 %a, i64 %b, i64 %c @@ -110,38 +124,52 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { ; RV32I-LABEL: cmov128: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: andi a1, a1, 1 -; RV32I-NEXT: bnez a1, .LBB3_2 +; RV32I-NEXT: andi a4, a1, 1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: bnez a4, .LBB3_2 ; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB3_2: # %entry +; RV32I-NEXT: lw a6, 0(a1) +; RV32I-NEXT: bnez a4, .LBB3_6 +; RV32I-NEXT: # %bb.3: # %entry ; RV32I-NEXT: addi a1, a3, 4 -; RV32I-NEXT: addi a4, a3, 8 -; RV32I-NEXT: addi a5, a3, 12 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: j .LBB3_3 -; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: beqz a4, .LBB3_7 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: addi a1, a2, 8 +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: beqz a4, .LBB3_8 +; RV32I-NEXT: .LBB3_5: +; RV32I-NEXT: addi a2, a2, 12 +; RV32I-NEXT: j .LBB3_9 +; RV32I-NEXT: .LBB3_6: ; RV32I-NEXT: addi a1, a2, 4 -; RV32I-NEXT: addi a4, a2, 8 -; RV32I-NEXT: addi a5, a2, 12 -; RV32I-NEXT: .LBB3_3: # %entry -; RV32I-NEXT: lw a3, 0(a5) -; RV32I-NEXT: sw a3, 12(a0) -; RV32I-NEXT: lw a3, 0(a4) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: bnez a4, .LBB3_4 +; RV32I-NEXT: .LBB3_7: # %entry +; RV32I-NEXT: addi a1, a3, 8 ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: sw a1, 4(a0) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: bnez a4, .LBB3_5 +; RV32I-NEXT: .LBB3_8: # %entry +; RV32I-NEXT: addi a2, a3, 12 +; RV32I-NEXT: .LBB3_9: # %entry +; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a6, 0(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: cmov128: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: bnez a0, .LBB3_2 +; RV64I-NEXT: andi a5, a0, 1 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a5, .LBB3_2 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: mv a2, a4 ; RV64I-NEXT: .LBB3_2: # %entry -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: mv a1, a2 ; RV64I-NEXT: ret entry: @@ -280,39 +308,37 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; RV32I-LABEL: cmovdiffcc: ; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: andi a1, a1, 1 -; RV32I-NEXT: beqz a1, .LBB7_3 +; RV32I-NEXT: beqz a0, .LBB7_3 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: beqz a0, .LBB7_4 +; RV32I-NEXT: beqz a1, .LBB7_4 ; RV32I-NEXT: .LBB7_2: # %entry ; RV32I-NEXT: add a0, a2, a4 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB7_3: # %entry -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: bnez a0, .LBB7_2 -; RV32I-NEXT: .LBB7_4: # %entry ; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: bnez a1, .LBB7_2 +; RV32I-NEXT: .LBB7_4: # %entry +; RV32I-NEXT: mv a4, a5 ; RV32I-NEXT: add a0, a2, a4 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cmovdiffcc: ; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: andi a1, a1, 1 -; RV64I-NEXT: beqz a1, .LBB7_3 +; RV64I-NEXT: beqz a0, .LBB7_3 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: beqz a0, .LBB7_4 +; RV64I-NEXT: beqz a1, .LBB7_4 ; RV64I-NEXT: .LBB7_2: # %entry ; RV64I-NEXT: addw a0, a2, a4 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB7_3: # %entry -; RV64I-NEXT: mv a4, a5 -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: bnez a0, .LBB7_2 -; RV64I-NEXT: .LBB7_4: # %entry ; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: bnez a1, .LBB7_2 +; RV64I-NEXT: .LBB7_4: # %entry +; RV64I-NEXT: mv a4, a5 ; RV64I-NEXT: addw a0, a2, a4 ; RV64I-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll --- a/llvm/test/CodeGen/RISCV/setcc-logic.ll +++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll @@ -7,17 +7,17 @@ define i1 @and_icmp_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; RV32I-LABEL: and_icmp_eq: ; RV32I: # %bb.0: -; RV32I-NEXT: xor a2, a2, a3 ; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: xor a1, a2, a3 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_icmp_eq: ; RV64I: # %bb.0: -; RV64I-NEXT: xor a2, a2, a3 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: xor a1, a2, a3 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: seqz a0, a0 @@ -31,17 +31,17 @@ define i1 @or_icmp_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; RV32I-LABEL: or_icmp_ne: ; RV32I: # %bb.0: -; RV32I-NEXT: xor a2, a2, a3 ; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: xor a1, a2, a3 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_icmp_ne: ; RV64I: # %bb.0: -; RV64I-NEXT: xor a2, a2, a3 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: xor a1, a2, a3 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: snez a0, a0 @@ -102,22 +102,22 @@ define i1 @and_icmps_const_not1bit_diff(i32 %x) nounwind { ; RV32I-LABEL: and_icmps_const_not1bit_diff: ; RV32I: # %bb.0: -; RV32I-NEXT: xori a1, a0, 92 +; RV32I-NEXT: xori a1, a0, 44 ; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: xori a0, a0, 44 +; RV32I-NEXT: xori a0, a0, 92 ; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_icmps_const_not1bit_diff: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: xori a1, a0, 92 +; RV64I-NEXT: xori a1, a0, 44 ; RV64I-NEXT: snez a1, a1 -; RV64I-NEXT: xori a0, a0, 44 +; RV64I-NEXT: xori a0, a0, 92 ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: ret %a = icmp ne i32 %x, 44 %b = icmp ne i32 %x, 92 diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -17,13 +17,14 @@ ; RV32I-NEXT: mv a1, zero ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: addi a3, zero, 31 ; RV32I-NEXT: sub a3, a3, a2 ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: srl a2, a1, a2 +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr64: @@ -62,11 +63,11 @@ ; RV32I-NEXT: srai a1, a1, 31 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: addi a3, zero, 31 ; RV32I-NEXT: sub a3, a3, a2 ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: sra a1, a1, a2 ; RV32I-NEXT: ret @@ -107,13 +108,14 @@ ; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: addi a3, zero, 31 ; RV32I-NEXT: sub a3, a3, a2 ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srl a3, a4, a3 -; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: sll a2, a0, a2 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl64: @@ -148,27 +150,27 @@ ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) ; RV32I-NEXT: sw s0, 40(sp) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a0, 12(a1) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: lw a0, 8(a1) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: lw a0, 0(a1) -; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: sw a3, 8(sp) ; RV32I-NEXT: call __lshrti3 ; RV32I-NEXT: lw a0, 36(sp) +; RV32I-NEXT: lw a1, 32(sp) +; RV32I-NEXT: lw a2, 28(sp) +; RV32I-NEXT: lw a3, 24(sp) ; RV32I-NEXT: sw a0, 12(s0) -; RV32I-NEXT: lw a0, 32(sp) -; RV32I-NEXT: sw a0, 8(s0) -; RV32I-NEXT: lw a0, 28(sp) -; RV32I-NEXT: sw a0, 4(s0) -; RV32I-NEXT: lw a0, 24(sp) -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 8(s0) +; RV32I-NEXT: sw a2, 4(s0) +; RV32I-NEXT: sw a3, 0(s0) ; RV32I-NEXT: lw s0, 40(sp) ; RV32I-NEXT: lw ra, 44(sp) ; RV32I-NEXT: addi sp, sp, 48 @@ -183,13 +185,14 @@ ; RV64I-NEXT: mv a1, zero ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: addi a3, zero, 63 ; RV64I-NEXT: sub a3, a3, a2 ; RV64I-NEXT: slli a4, a1, 1 ; RV64I-NEXT: sll a3, a4, a3 -; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: srl a2, a1, a2 +; RV64I-NEXT: mv a1, a2 ; RV64I-NEXT: ret %1 = lshr i128 %a, %b ret i128 %1 @@ -201,27 +204,27 @@ ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) ; RV32I-NEXT: sw s0, 40(sp) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a0, 12(a1) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: lw a0, 8(a1) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: lw a0, 0(a1) -; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: sw a3, 8(sp) ; RV32I-NEXT: call __ashrti3 ; RV32I-NEXT: lw a0, 36(sp) +; RV32I-NEXT: lw a1, 32(sp) +; RV32I-NEXT: lw a2, 28(sp) +; RV32I-NEXT: lw a3, 24(sp) ; RV32I-NEXT: sw a0, 12(s0) -; RV32I-NEXT: lw a0, 32(sp) -; RV32I-NEXT: sw a0, 8(s0) -; RV32I-NEXT: lw a0, 28(sp) -; RV32I-NEXT: sw a0, 4(s0) -; RV32I-NEXT: lw a0, 24(sp) -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 8(s0) +; RV32I-NEXT: sw a2, 4(s0) +; RV32I-NEXT: sw a3, 0(s0) ; RV32I-NEXT: lw s0, 40(sp) ; RV32I-NEXT: lw ra, 44(sp) ; RV32I-NEXT: addi sp, sp, 48 @@ -236,11 +239,11 @@ ; RV64I-NEXT: srai a1, a1, 63 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: addi a3, zero, 63 ; RV64I-NEXT: sub a3, a3, a2 ; RV64I-NEXT: slli a4, a1, 1 ; RV64I-NEXT: sll a3, a4, a3 -; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: sra a1, a1, a2 ; RV64I-NEXT: ret @@ -254,27 +257,27 @@ ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) ; RV32I-NEXT: sw s0, 40(sp) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a0, 12(a1) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: lw a0, 8(a1) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: lw a0, 0(a1) -; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: sw a3, 8(sp) ; RV32I-NEXT: call __ashlti3 ; RV32I-NEXT: lw a0, 36(sp) +; RV32I-NEXT: lw a1, 32(sp) +; RV32I-NEXT: lw a2, 28(sp) +; RV32I-NEXT: lw a3, 24(sp) ; RV32I-NEXT: sw a0, 12(s0) -; RV32I-NEXT: lw a0, 32(sp) -; RV32I-NEXT: sw a0, 8(s0) -; RV32I-NEXT: lw a0, 28(sp) -; RV32I-NEXT: sw a0, 4(s0) -; RV32I-NEXT: lw a0, 24(sp) -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 8(s0) +; RV32I-NEXT: sw a2, 4(s0) +; RV32I-NEXT: sw a3, 0(s0) ; RV32I-NEXT: lw s0, 40(sp) ; RV32I-NEXT: lw ra, 44(sp) ; RV32I-NEXT: addi sp, sp, 48 @@ -289,13 +292,14 @@ ; RV64I-NEXT: mv a0, zero ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: sll a1, a1, a2 ; RV64I-NEXT: addi a3, zero, 63 ; RV64I-NEXT: sub a3, a3, a2 ; RV64I-NEXT: srli a4, a0, 1 ; RV64I-NEXT: srl a3, a4, a3 -; RV64I-NEXT: sll a1, a1, a2 ; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: sll a2, a0, a2 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret %1 = shl i128 %a, %b ret i128 %1 diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll --- a/llvm/test/CodeGen/RISCV/split-offsets.ll +++ b/llvm/test/CodeGen/RISCV/split-offsets.ll @@ -11,33 +11,33 @@ define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { ; RV32I-LABEL: test1: ; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: lui a2, 20 ; RV32I-NEXT: addi a2, a2, -1920 -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: addi a2, zero, 2 +; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: addi a3, zero, 1 ; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: addi a4, zero, 2 -; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: add a0, a1, a2 -; RV32I-NEXT: sw a4, 4(a0) -; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a3, 0(a1) +; RV32I-NEXT: sw a2, 4(a1) ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test1: ; RV64I: # %bb.0: # %entry +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: lui a2, 20 ; RV64I-NEXT: addiw a2, a2, -1920 -; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 2 +; RV64I-NEXT: sw a2, 0(a0) ; RV64I-NEXT: addi a3, zero, 1 ; RV64I-NEXT: sw a3, 4(a0) -; RV64I-NEXT: addi a4, zero, 2 -; RV64I-NEXT: sw a4, 0(a0) -; RV64I-NEXT: add a0, a1, a2 -; RV64I-NEXT: sw a4, 4(a0) -; RV64I-NEXT: sw a3, 0(a0) +; RV64I-NEXT: sw a3, 0(a1) +; RV64I-NEXT: sw a2, 4(a1) ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret entry: @@ -57,20 +57,20 @@ define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { ; RV32I-LABEL: test2: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lui a3, 20 -; RV32I-NEXT: addi a3, a3, -1920 -; RV32I-NEXT: lw a0, 0(a0) -; RV32I-NEXT: add a0, a0, a3 -; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lui a0, 20 +; RV32I-NEXT: addi a5, a0, -1920 +; RV32I-NEXT: add a0, a1, a5 +; RV32I-NEXT: add a1, a4, a5 ; RV32I-NEXT: bge a3, a2, .LBB1_2 ; RV32I-NEXT: .LBB1_1: # %while_body ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: addi a4, a3, 1 -; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a3, 4(a1) ; RV32I-NEXT: sw a4, 0(a1) +; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: blt a3, a2, .LBB1_1 ; RV32I-NEXT: .LBB1_2: # %while_end @@ -79,22 +79,22 @@ ; ; RV64I-LABEL: test2: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: lui a3, 20 -; RV64I-NEXT: addiw a3, a3, -1920 -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: add a0, a0, a3 -; RV64I-NEXT: add a1, a1, a3 -; RV64I-NEXT: sext.w a2, a2 ; RV64I-NEXT: mv a3, zero +; RV64I-NEXT: ld a4, 0(a0) +; RV64I-NEXT: lui a0, 20 +; RV64I-NEXT: addiw a5, a0, -1920 +; RV64I-NEXT: add a0, a1, a5 +; RV64I-NEXT: add a1, a4, a5 +; RV64I-NEXT: sext.w a2, a2 ; RV64I-NEXT: sext.w a4, a3 ; RV64I-NEXT: bge a4, a2, .LBB1_2 ; RV64I-NEXT: .LBB1_1: # %while_body ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sw a3, 4(a0) ; RV64I-NEXT: addi a4, a3, 1 -; RV64I-NEXT: sw a4, 0(a0) -; RV64I-NEXT: sw a3, 4(a1) ; RV64I-NEXT: sw a4, 0(a1) +; RV64I-NEXT: sw a3, 4(a1) +; RV64I-NEXT: sw a4, 0(a0) +; RV64I-NEXT: sw a3, 4(a0) ; RV64I-NEXT: mv a3, a4 ; RV64I-NEXT: sext.w a4, a3 ; RV64I-NEXT: blt a4, a2, .LBB1_1 diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -4,111 +4,113 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-LABEL: muloti_test: ; RISCV32: # %bb.0: # %start -; RISCV32-NEXT: addi sp, sp, -80 -; RISCV32-NEXT: sw ra, 76(sp) -; RISCV32-NEXT: sw s0, 72(sp) -; RISCV32-NEXT: sw s1, 68(sp) -; RISCV32-NEXT: sw s2, 64(sp) -; RISCV32-NEXT: sw s3, 60(sp) -; RISCV32-NEXT: sw s4, 56(sp) -; RISCV32-NEXT: sw s5, 52(sp) -; RISCV32-NEXT: sw s6, 48(sp) -; RISCV32-NEXT: mv s1, a2 -; RISCV32-NEXT: mv s0, a1 -; RISCV32-NEXT: mv s2, a0 -; RISCV32-NEXT: sw zero, 12(sp) -; RISCV32-NEXT: sw zero, 8(sp) -; RISCV32-NEXT: sw zero, 28(sp) -; RISCV32-NEXT: sw zero, 24(sp) -; RISCV32-NEXT: lw s3, 4(a2) -; RISCV32-NEXT: sw s3, 4(sp) -; RISCV32-NEXT: lw s5, 0(a2) -; RISCV32-NEXT: sw s5, 0(sp) -; RISCV32-NEXT: lw s4, 4(a1) -; RISCV32-NEXT: sw s4, 20(sp) -; RISCV32-NEXT: lw s6, 0(a1) -; RISCV32-NEXT: sw s6, 16(sp) -; RISCV32-NEXT: addi a0, sp, 32 -; RISCV32-NEXT: addi a1, sp, 16 -; RISCV32-NEXT: mv a2, sp +; RISCV32-NEXT: addi sp, sp, -96 +; RISCV32-NEXT: sw ra, 92(sp) +; RISCV32-NEXT: sw s0, 88(sp) +; RISCV32-NEXT: sw s1, 84(sp) +; RISCV32-NEXT: sw s2, 80(sp) +; RISCV32-NEXT: sw s3, 76(sp) +; RISCV32-NEXT: sw s4, 72(sp) +; RISCV32-NEXT: sw s5, 68(sp) +; RISCV32-NEXT: sw s6, 64(sp) +; RISCV32-NEXT: sw s7, 60(sp) +; RISCV32-NEXT: sw s8, 56(sp) +; RISCV32-NEXT: lw s2, 12(a1) +; RISCV32-NEXT: lw s6, 8(a1) +; RISCV32-NEXT: lw s3, 12(a2) +; RISCV32-NEXT: lw s7, 8(a2) +; RISCV32-NEXT: lw s0, 0(a1) +; RISCV32-NEXT: lw s8, 4(a1) +; RISCV32-NEXT: lw s1, 0(a2) +; RISCV32-NEXT: lw s5, 4(a2) +; RISCV32-NEXT: mv s4, a0 +; RISCV32-NEXT: sw zero, 20(sp) +; RISCV32-NEXT: sw zero, 16(sp) +; RISCV32-NEXT: sw zero, 36(sp) +; RISCV32-NEXT: sw zero, 32(sp) +; RISCV32-NEXT: sw s5, 12(sp) +; RISCV32-NEXT: sw s1, 8(sp) +; RISCV32-NEXT: sw s8, 28(sp) +; RISCV32-NEXT: addi a0, sp, 40 +; RISCV32-NEXT: addi a1, sp, 24 +; RISCV32-NEXT: addi a2, sp, 8 +; RISCV32-NEXT: sw s0, 24(sp) ; RISCV32-NEXT: call __multi3 -; RISCV32-NEXT: lw a0, 12(s0) -; RISCV32-NEXT: lw a1, 8(s0) -; RISCV32-NEXT: mul a2, s3, a1 -; RISCV32-NEXT: mul a3, a0, s5 -; RISCV32-NEXT: add a4, a3, a2 -; RISCV32-NEXT: lw a2, 12(s1) -; RISCV32-NEXT: lw a3, 8(s1) -; RISCV32-NEXT: mul a5, s4, a3 -; RISCV32-NEXT: mul s1, a2, s6 -; RISCV32-NEXT: add a5, s1, a5 -; RISCV32-NEXT: mul s1, a3, s6 -; RISCV32-NEXT: mul s0, a1, s5 -; RISCV32-NEXT: add s1, s0, s1 -; RISCV32-NEXT: sltu s0, s1, s0 -; RISCV32-NEXT: mulhu a6, a3, s6 -; RISCV32-NEXT: add t1, a6, a5 -; RISCV32-NEXT: mulhu t2, a1, s5 -; RISCV32-NEXT: add t3, t2, a4 -; RISCV32-NEXT: add a5, t3, t1 -; RISCV32-NEXT: add a5, a5, s0 -; RISCV32-NEXT: lw s0, 44(sp) -; RISCV32-NEXT: add a5, s0, a5 -; RISCV32-NEXT: lw a4, 40(sp) -; RISCV32-NEXT: add a7, a4, s1 -; RISCV32-NEXT: sltu t0, a7, a4 -; RISCV32-NEXT: add a5, a5, t0 -; RISCV32-NEXT: beq a5, s0, .LBB0_2 +; RISCV32-NEXT: mul a0, s8, s7 +; RISCV32-NEXT: mul a1, s3, s0 +; RISCV32-NEXT: add a0, a1, a0 +; RISCV32-NEXT: mulhu a5, s7, s0 +; RISCV32-NEXT: add a0, a5, a0 +; RISCV32-NEXT: mul a1, s5, s6 +; RISCV32-NEXT: mul a2, s2, s1 +; RISCV32-NEXT: add a1, a2, a1 +; RISCV32-NEXT: mulhu t0, s6, s1 +; RISCV32-NEXT: add t1, t0, a1 +; RISCV32-NEXT: add a6, t1, a0 +; RISCV32-NEXT: mul a1, s7, s0 +; RISCV32-NEXT: mul a3, s6, s1 +; RISCV32-NEXT: add a4, a3, a1 +; RISCV32-NEXT: lw a1, 52(sp) +; RISCV32-NEXT: lw a2, 48(sp) +; RISCV32-NEXT: sltu a3, a4, a3 +; RISCV32-NEXT: add a3, a6, a3 +; RISCV32-NEXT: add a3, a1, a3 +; RISCV32-NEXT: add a6, a2, a4 +; RISCV32-NEXT: sltu a2, a6, a2 +; RISCV32-NEXT: add a7, a3, a2 +; RISCV32-NEXT: beq a7, a1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: # %start -; RISCV32-NEXT: sltu t0, a5, s0 +; RISCV32-NEXT: sltu a2, a7, a1 ; RISCV32-NEXT: .LBB0_2: # %start -; RISCV32-NEXT: snez a4, s3 -; RISCV32-NEXT: snez s1, a0 -; RISCV32-NEXT: and a4, s1, a4 -; RISCV32-NEXT: snez s1, s4 -; RISCV32-NEXT: snez s0, a2 -; RISCV32-NEXT: and s1, s0, s1 -; RISCV32-NEXT: mulhu s0, a2, s6 -; RISCV32-NEXT: snez s0, s0 -; RISCV32-NEXT: or s1, s1, s0 -; RISCV32-NEXT: mulhu s0, a0, s5 -; RISCV32-NEXT: snez s0, s0 -; RISCV32-NEXT: or a4, a4, s0 -; RISCV32-NEXT: sltu t2, t3, t2 -; RISCV32-NEXT: mulhu s0, s3, a1 -; RISCV32-NEXT: snez s0, s0 -; RISCV32-NEXT: or t3, a4, s0 -; RISCV32-NEXT: sltu s0, t1, a6 -; RISCV32-NEXT: mulhu a4, s4, a3 +; RISCV32-NEXT: sltu a0, a0, a5 +; RISCV32-NEXT: snez a1, s8 +; RISCV32-NEXT: snez a3, s3 +; RISCV32-NEXT: and a1, a3, a1 +; RISCV32-NEXT: mulhu a3, s3, s0 +; RISCV32-NEXT: snez a3, a3 +; RISCV32-NEXT: or a1, a1, a3 +; RISCV32-NEXT: mulhu a3, s8, s7 +; RISCV32-NEXT: snez a3, a3 +; RISCV32-NEXT: or a1, a1, a3 +; RISCV32-NEXT: or a0, a1, a0 +; RISCV32-NEXT: sltu a1, t1, t0 +; RISCV32-NEXT: snez a3, s5 +; RISCV32-NEXT: snez a4, s2 +; RISCV32-NEXT: and a3, a4, a3 +; RISCV32-NEXT: mulhu a4, s2, s1 +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: or a3, a3, a4 +; RISCV32-NEXT: mulhu a4, s5, s6 +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: or a3, a3, a4 +; RISCV32-NEXT: or a1, a3, a1 +; RISCV32-NEXT: or a3, s7, s3 +; RISCV32-NEXT: snez a3, a3 +; RISCV32-NEXT: or a4, s6, s2 ; RISCV32-NEXT: snez a4, a4 -; RISCV32-NEXT: or a4, s1, a4 -; RISCV32-NEXT: lw s1, 36(sp) -; RISCV32-NEXT: sw s1, 4(s2) -; RISCV32-NEXT: lw s1, 32(sp) -; RISCV32-NEXT: sw s1, 0(s2) -; RISCV32-NEXT: sw a7, 8(s2) -; RISCV32-NEXT: sw a5, 12(s2) -; RISCV32-NEXT: or a4, a4, s0 -; RISCV32-NEXT: or a5, t3, t2 +; RISCV32-NEXT: and a3, a4, a3 +; RISCV32-NEXT: or a1, a3, a1 ; RISCV32-NEXT: or a0, a1, a0 -; RISCV32-NEXT: or a1, a3, a2 -; RISCV32-NEXT: snez a1, a1 -; RISCV32-NEXT: snez a0, a0 -; RISCV32-NEXT: and a0, a0, a1 -; RISCV32-NEXT: or a0, a0, a5 -; RISCV32-NEXT: or a0, a0, a4 -; RISCV32-NEXT: or a0, a0, t0 +; RISCV32-NEXT: lw a1, 44(sp) +; RISCV32-NEXT: lw a3, 40(sp) +; RISCV32-NEXT: or a0, a0, a2 ; RISCV32-NEXT: andi a0, a0, 1 -; RISCV32-NEXT: sb a0, 16(s2) -; RISCV32-NEXT: lw s6, 48(sp) -; RISCV32-NEXT: lw s5, 52(sp) -; RISCV32-NEXT: lw s4, 56(sp) -; RISCV32-NEXT: lw s3, 60(sp) -; RISCV32-NEXT: lw s2, 64(sp) -; RISCV32-NEXT: lw s1, 68(sp) -; RISCV32-NEXT: lw s0, 72(sp) -; RISCV32-NEXT: lw ra, 76(sp) -; RISCV32-NEXT: addi sp, sp, 80 +; RISCV32-NEXT: sw a1, 4(s4) +; RISCV32-NEXT: sw a3, 0(s4) +; RISCV32-NEXT: sw a6, 8(s4) +; RISCV32-NEXT: sw a7, 12(s4) +; RISCV32-NEXT: sb a0, 16(s4) +; RISCV32-NEXT: lw s8, 56(sp) +; RISCV32-NEXT: lw s7, 60(sp) +; RISCV32-NEXT: lw s6, 64(sp) +; RISCV32-NEXT: lw s5, 68(sp) +; RISCV32-NEXT: lw s4, 72(sp) +; RISCV32-NEXT: lw s3, 76(sp) +; RISCV32-NEXT: lw s2, 80(sp) +; RISCV32-NEXT: lw s1, 84(sp) +; RISCV32-NEXT: lw s0, 88(sp) +; RISCV32-NEXT: lw ra, 92(sp) +; RISCV32-NEXT: addi sp, sp, 96 ; RISCV32-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -46,9 +46,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 24 ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 ; ILP32-ILP32F-FPELIM-NEXT: ret ; @@ -65,9 +65,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) ; ILP32-ILP32F-WITHFP-NEXT: addi sp, sp, 48 @@ -83,9 +83,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 24 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; @@ -150,9 +150,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 24 ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 ; ILP32-ILP32F-FPELIM-NEXT: ret ; @@ -169,9 +169,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) ; ILP32-ILP32F-WITHFP-NEXT: addi sp, sp, 48 @@ -187,9 +187,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 24 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; @@ -203,10 +203,10 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 24 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -223,10 +223,10 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 @@ -256,9 +256,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, s0, 8 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, -16(s0) -; ILP32-ILP32F-FPELIM-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a1, 15 ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -16 ; ILP32-ILP32F-FPELIM-NEXT: sub a0, sp, a0 @@ -286,9 +286,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a1, 15 ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -16 ; ILP32-ILP32F-WITHFP-NEXT: sub a0, sp, a0 @@ -316,9 +316,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 8(s0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 4(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, s0, 8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, -16(s0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 4(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a1, 15 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sub a0, sp, a0 @@ -346,17 +346,17 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, s0, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, -32(s0) -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, zero, 1 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 33 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -16 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 32 -; LP64-LP64F-LP64D-FPELIM-NEXT: srli a1, a1, 32 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a1, a0 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd s1, 8(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: srli a0, a0, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 15 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, zero, 1 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 33 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, -16 +; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a0, a1 ; LP64-LP64F-LP64D-FPELIM-NEXT: sub a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: call notdead @@ -382,17 +382,17 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, zero, 1 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 33 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -16 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: srli a1, a1, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a1, a0 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: srli a0, a0, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, zero, 1 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 33 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, -16 +; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a0, a1 ; LP64-LP64F-LP64D-WITHFP-NEXT: sub a0, sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: call notdead @@ -419,9 +419,9 @@ ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -16 ; ILP32-ILP32F-FPELIM-NEXT: sw ra, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: mv a2, zero ; ILP32-ILP32F-FPELIM-NEXT: lui a3, 261888 ; ILP32-ILP32F-FPELIM-NEXT: addi a4, zero, 2 +; ILP32-ILP32F-FPELIM-NEXT: mv a2, zero ; ILP32-ILP32F-FPELIM-NEXT: call va1 ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 16 @@ -433,9 +433,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 -; ILP32-ILP32F-WITHFP-NEXT: mv a2, zero ; ILP32-ILP32F-WITHFP-NEXT: lui a3, 261888 ; ILP32-ILP32F-WITHFP-NEXT: addi a4, zero, 2 +; ILP32-ILP32F-WITHFP-NEXT: mv a2, zero ; ILP32-ILP32F-WITHFP-NEXT: call va1 ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) @@ -446,9 +446,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a2, zero ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a3, 261888 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a4, zero, 2 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a2, zero ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 @@ -498,10 +498,10 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) -; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 35 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27 ; ILP32-ILP32F-FPELIM-NEXT: andi a1, a0, -8 +; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 35 +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a1) ; ILP32-ILP32F-FPELIM-NEXT: ori a1, a1, 4 ; ILP32-ILP32F-FPELIM-NEXT: lw a1, 0(a1) @@ -521,10 +521,10 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) -; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 19 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a1, a0, -8 +; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 19 +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a1) ; ILP32-ILP32F-WITHFP-NEXT: ori a1, a1, 4 ; ILP32-ILP32F-WITHFP-NEXT: lw a1, 0(a1) @@ -543,10 +543,10 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 35 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a1, a0, -8 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 35 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a1) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ori a1, a1, 4 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a1, 0(a1) @@ -556,8 +556,6 @@ ; LP64-LP64F-LP64D-FPELIM-LABEL: va2: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 24 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) @@ -565,6 +563,8 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 24 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a0, 32 @@ -585,8 +585,6 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 8 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) @@ -594,6 +592,8 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a0, 32 @@ -705,10 +705,10 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 24 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -725,10 +725,10 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 @@ -747,8 +747,8 @@ ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -16 ; ILP32-ILP32F-FPELIM-NEXT: sw ra, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: mv a2, zero ; ILP32-ILP32F-FPELIM-NEXT: lui a3, 261888 +; ILP32-ILP32F-FPELIM-NEXT: mv a2, zero ; ILP32-ILP32F-FPELIM-NEXT: call va2 ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 16 @@ -760,8 +760,8 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 -; ILP32-ILP32F-WITHFP-NEXT: mv a2, zero ; ILP32-ILP32F-WITHFP-NEXT: lui a3, 261888 +; ILP32-ILP32F-WITHFP-NEXT: mv a2, zero ; ILP32-ILP32F-WITHFP-NEXT: call va2 ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) @@ -772,8 +772,8 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a2, zero ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a3, 261888 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a2, zero ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va2 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 @@ -819,16 +819,16 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19 ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8 -; ILP32-ILP32F-FPELIM-NEXT: ori a3, a0, 4 -; ILP32-ILP32F-FPELIM-NEXT: lw a3, 0(a3) -; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a3 -; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0) -; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a0 +; ILP32-ILP32F-FPELIM-NEXT: addi a3, sp, 27 +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 4(sp) +; ILP32-ILP32F-FPELIM-NEXT: lw a3, 0(a0) +; ILP32-ILP32F-FPELIM-NEXT: ori a0, a0, 4 +; ILP32-ILP32F-FPELIM-NEXT: lw a4, 0(a0) +; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a3 ; ILP32-ILP32F-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a4 ; ILP32-ILP32F-FPELIM-NEXT: add a1, a2, a1 ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 32 ; ILP32-ILP32F-FPELIM-NEXT: ret @@ -844,16 +844,16 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0) -; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 19 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8 -; ILP32-ILP32F-WITHFP-NEXT: ori a3, a0, 4 -; ILP32-ILP32F-WITHFP-NEXT: lw a3, 0(a3) -; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a3 -; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0) -; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a0 +; ILP32-ILP32F-WITHFP-NEXT: addi a3, s0, 19 +; ILP32-ILP32F-WITHFP-NEXT: sw a3, -12(s0) +; ILP32-ILP32F-WITHFP-NEXT: lw a3, 0(a0) +; ILP32-ILP32F-WITHFP-NEXT: ori a0, a0, 4 +; ILP32-ILP32F-WITHFP-NEXT: lw a4, 0(a0) +; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a3 ; ILP32-ILP32F-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a4 ; ILP32-ILP32F-WITHFP-NEXT: add a1, a2, a1 ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 16(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 20(sp) @@ -868,16 +868,16 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 4(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 19 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ori a3, a0, 4 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 0(a3) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a3 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, sp, 27 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 4(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 0(a0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ori a0, a0, 4 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a4, 0(a0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a3 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sltu a1, a0, a1 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a4 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a2, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret @@ -885,15 +885,15 @@ ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 16 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a2, a0, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: srli a2, a2, 32 @@ -914,15 +914,15 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, s0 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, s0 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a2, a0, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: srli a2, a2, 32 @@ -973,9 +973,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: addi a4, a3, 4 ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: lw a3, 0(a3) -; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a3 ; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a0 ; ILP32-ILP32F-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a3 ; ILP32-ILP32F-FPELIM-NEXT: add a1, a2, a1 ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 32 ; ILP32-ILP32F-FPELIM-NEXT: ret @@ -999,9 +999,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: addi a4, a3, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a4, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw a3, 0(a3) -; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a3 ; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a0 ; ILP32-ILP32F-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a3 ; ILP32-ILP32F-WITHFP-NEXT: add a1, a2, a1 ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 16(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 20(sp) @@ -1023,9 +1023,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: fld ft0, 0(a0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: fsd ft0, 8(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 8(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a0 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a3 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sltu a1, a0, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a2, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 @@ -1039,11 +1039,11 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 16 -; LP64-LP64F-LP64D-FPELIM-NEXT: ori a0, a0, 8 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: ori a3, a0, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 64 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -1058,11 +1058,11 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, s0 -; LP64-LP64F-LP64D-WITHFP-NEXT: ori a0, a0, 8 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, s0 +; LP64-LP64F-LP64D-WITHFP-NEXT: ori a3, a0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 @@ -1084,9 +1084,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw ra, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, zero, 2 ; ILP32-ILP32F-FPELIM-NEXT: addi a1, zero, 1111 +; ILP32-ILP32F-FPELIM-NEXT: lui a5, 262144 ; ILP32-ILP32F-FPELIM-NEXT: mv a2, zero ; ILP32-ILP32F-FPELIM-NEXT: mv a4, zero -; ILP32-ILP32F-FPELIM-NEXT: lui a5, 262144 ; ILP32-ILP32F-FPELIM-NEXT: call va3 ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 16 @@ -1100,9 +1100,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, zero, 2 ; ILP32-ILP32F-WITHFP-NEXT: addi a1, zero, 1111 +; ILP32-ILP32F-WITHFP-NEXT: lui a5, 262144 ; ILP32-ILP32F-WITHFP-NEXT: mv a2, zero ; ILP32-ILP32F-WITHFP-NEXT: mv a4, zero -; ILP32-ILP32F-WITHFP-NEXT: lui a5, 262144 ; ILP32-ILP32F-WITHFP-NEXT: call va3 ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) @@ -1115,9 +1115,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, zero, 2 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, zero, 1111 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 262144 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a2, zero ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a4, zero -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 262144 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va3 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 @@ -1190,9 +1190,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -4 ; ILP32-ILP32F-FPELIM-NEXT: addi a3, a0, 4 ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 4(sp) +; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0) ; ILP32-ILP32F-FPELIM-NEXT: add a1, a1, s0 ; ILP32-ILP32F-FPELIM-NEXT: add a1, a1, a2 -; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0) ; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a0 ; ILP32-ILP32F-FPELIM-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 12(sp) @@ -1233,9 +1233,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -4 ; ILP32-ILP32F-WITHFP-NEXT: addi a3, a0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a3, -16(s0) +; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0) ; ILP32-ILP32F-WITHFP-NEXT: add a1, a1, s1 ; ILP32-ILP32F-WITHFP-NEXT: add a1, a1, a2 -; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0) ; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a0 ; ILP32-ILP32F-WITHFP-NEXT: lw s1, 20(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 24(sp) @@ -1275,9 +1275,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -4 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, a0, 4 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 4(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a1, s0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a1, a2 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw s0, 8(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 12(sp) @@ -1317,9 +1317,9 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -4 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a0, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a0, 0(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, a1, s0 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, a1, a2 -; LP64-LP64F-LP64D-FPELIM-NEXT: ld a0, 0(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: addw a0, a1, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: ld s0, 16(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 24(sp) @@ -1361,9 +1361,9 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -4 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a0, 0(a0) ; LP64-LP64F-LP64D-WITHFP-NEXT: add a1, a1, s1 ; LP64-LP64F-LP64D-WITHFP-NEXT: add a1, a1, a2 -; LP64-LP64F-LP64D-WITHFP-NEXT: ld a0, 0(a0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addw a0, a1, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) @@ -1425,8 +1425,7 @@ ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -328 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 36(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 335544 -; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 1311 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 32(sp) +; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 1311 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 688509 ; ILP32-ILP32F-FPELIM-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-FPELIM-NEXT: addi a2, sp, 32 @@ -1435,6 +1434,7 @@ ; ILP32-ILP32F-FPELIM-NEXT: addi a3, zero, 12 ; ILP32-ILP32F-FPELIM-NEXT: addi a4, zero, 13 ; ILP32-ILP32F-FPELIM-NEXT: addi a7, zero, 4 +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: call va5_aligned_stack_callee ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 60(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 64 @@ -1470,8 +1470,7 @@ ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -328 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -28(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 335544 -; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 1311 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -32(s0) +; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 1311 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 688509 ; ILP32-ILP32F-WITHFP-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-WITHFP-NEXT: addi a2, s0, -32 @@ -1480,6 +1479,7 @@ ; ILP32-ILP32F-WITHFP-NEXT: addi a3, zero, 12 ; ILP32-ILP32F-WITHFP-NEXT: addi a4, zero, 13 ; ILP32-ILP32F-WITHFP-NEXT: addi a7, zero, 4 +; ILP32-ILP32F-WITHFP-NEXT: sw a5, -32(s0) ; ILP32-ILP32F-WITHFP-NEXT: call va5_aligned_stack_callee ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 56(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 60(sp) @@ -1514,8 +1514,7 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -328 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 36(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 335544 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 1311 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 1311 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 688509 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, a0, -2048 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a2, sp, 32 @@ -1524,6 +1523,7 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, zero, 12 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a4, zero, 13 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a7, zero, 4 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va5_aligned_stack_callee ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 60(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 @@ -1546,33 +1546,33 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 14 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 655 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 1475 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 0(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi t0, a0, 1475 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1192 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 381 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a6, a0, -2048 ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1048248 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 1311 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -1147 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 512 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a1, a1, 73 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, -1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 14 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 983 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a2, 1192 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a2, a2, 381 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a2, a2, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a6, a2, -2048 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a0, 1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a1, -1967 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 512 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 73 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 15 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -1311 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 1147 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 14 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a0, -1967 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, zero, 1 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, zero, 11 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a4, zero, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a5, zero, 13 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a7, zero, 14 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 0(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: call va5_aligned_stack_callee ; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 48 @@ -1597,33 +1597,33 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 14 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 655 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 1475 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 0(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi t0, a0, 1475 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1192 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 381 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a6, a0, -2048 ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1048248 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 1311 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -1147 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 512 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a1, a1, 73 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, -1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 14 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 983 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a2, 1192 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a2, a2, 381 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a2, a2, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a6, a2, -2048 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a0, 1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a1, -1967 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 512 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 73 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -1311 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 1147 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 14 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a0, -1967 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, zero, 1 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, zero, 11 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a4, zero, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a5, zero, 13 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a7, zero, 14 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 0(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: call va5_aligned_stack_callee ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) @@ -1650,9 +1650,9 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 20 ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 ; ILP32-ILP32F-FPELIM-NEXT: ret ; @@ -1669,9 +1669,9 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) ; ILP32-ILP32F-WITHFP-NEXT: addi sp, sp, 48 @@ -1687,9 +1687,9 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 20 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; @@ -1703,10 +1703,10 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 16 ; LP64-LP64F-LP64D-FPELIM-NEXT: ori a1, a1, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -1723,10 +1723,10 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 0(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: mv a1, s0 ; LP64-LP64F-LP64D-WITHFP-NEXT: ori a1, a1, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 0(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 diff --git a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll --- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll +++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll @@ -45,10 +45,10 @@ ; RV32I-LABEL: test_zext_i16: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(shorts) -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi a1, a1, -120 -; RV32I-NEXT: lhu a2, %lo(shorts)(a0) -; RV32I-NEXT: bne a2, a1, .LBB1_3 +; RV32I-NEXT: lhu a1, %lo(shorts)(a0) +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -120 +; RV32I-NEXT: bne a1, a2, .LBB1_3 ; RV32I-NEXT: # %bb.1: # %entry ; RV32I-NEXT: addi a0, a0, %lo(shorts) ; RV32I-NEXT: lhu a0, 2(a0)