Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -418,6 +418,11 @@ /// i1 then the high bits must conform to getBooleanContents. SELECT, + /// Like SELECT, but with TRUEVAL and FALSEVAL expanded. Especially useful + /// for targets that don't have a conditional move instruction. + /// [Lo,Hi] = SELECT_PARTS(Cond, TrueLo, TrueHi, FalseLo, FalseHi) + SELECT_PARTS, + /// Select with a vector condition (op #0) and two vector operands (ops #1 /// and #2), returning a vector result. All vectors have the same length. /// Much like the scalar select and setcc, each bit in the condition selects Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1511,7 +1511,6 @@ "operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; - case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; @@ -1521,6 +1520,7 @@ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + case ISD::SELECT: ExpandIntRes_SELECT(N, Lo, Hi); break; case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; @@ -1911,6 +1911,34 @@ } } +void DAGTypeLegalizer::ExpandIntRes_SELECT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction( + ISD::SELECT_PARTS, NVT); + + // If the target has a custom lowering for SELECT_PARTS (which should + // generate better code than two SELECTs) then we emit that. Otherwise + // we just split the SELECT. + const bool SupportsSelectParts = Action == TargetLowering::Custom; + + if (!SupportsSelectParts) { + SplitRes_SELECT(N, Lo, Hi); + return; + } + + SDLoc DL(N); + SDValue Cond = GetPromotedInteger(N->getOperand(0)); + SDValue TrueL, TrueH; + GetExpandedInteger(N->getOperand(1), TrueL, TrueH); + SDValue FalseL, FalseH; + GetExpandedInteger(N->getOperand(2), FalseL, FalseH); + SDValue Ops[] = { Cond, TrueL, TrueH, FalseL, FalseH }; + Lo = DAG.getNode(ISD::SELECT_PARTS, DL, DAG.getVTList(NVT, NVT), Ops); + Hi = Lo.getValue(1); +} + void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -398,6 +398,7 @@ // Integer Result Expansion. void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -270,6 +270,7 @@ case ISD::SETCC: return "setcc"; case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; + case ISD::SELECT_PARTS: return "select_parts"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -29,6 +29,7 @@ MRET_FLAG, CALL, SELECT_CC, + SELECT_CC_PARTS, BuildPairF64, SplitF64, TAIL, @@ -137,10 +138,14 @@ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + void optimizeSelectCC(SDValue Op, SDValue &LHS, SDValue &RHS, + SDValue &CC, SelectionDAG &DAG) const; + bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector &ArgLocs) const; Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -68,6 +68,8 @@ setOperationAction(ISD::SELECT, XLenVT, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); + setOperationAction(ISD::SELECT_PARTS, XLenVT, Custom); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -335,6 +337,8 @@ return lowerConstantPool(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::SELECT_PARTS: + return lowerSELECT_PARTS(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::FRAMEADDR: @@ -430,39 +434,42 @@ SDLoc DL(Op); MVT XLenVT = Subtarget.getXLenVT(); - // If the result type is XLenVT and CondV is the output of a SETCC node - // which also operated on XLenVT inputs, then merge the SETCC node into the - // lowered RISCVISD::SELECT_CC to take advantage of the integer - // compare+branch instructions. i.e.: - // (select (setcc lhs, rhs, cc), truev, falsev) - // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) - if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && - CondV.getOperand(0).getSimpleValueType() == XLenVT) { - SDValue LHS = CondV.getOperand(0); - SDValue RHS = CondV.getOperand(1); - auto CC = cast(CondV.getOperand(2)); - ISD::CondCode CCVal = CC->get(); - - normaliseSetCC(LHS, RHS, CCVal); - - SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; - return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); - } - - // Otherwise: + // Lower as follows, unless optimizeSelectCC is able to optimize // (select condv, truev, falsev) // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) SDValue Zero = DAG.getConstant(0, DL, XLenVT); SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); - + optimizeSelectCC(Op, CondV, Zero, SetNE, DAG); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); } +SDValue RISCVTargetLowering::lowerSELECT_PARTS(SDValue Op, + SelectionDAG &DAG) const { + SDValue CondV = Op.getOperand(0); + SDValue TrueVLo = Op.getOperand(1); + SDValue TrueVHi = Op.getOperand(2); + SDValue FalseVLo = Op.getOperand(3); + SDValue FalseVHi = Op.getOperand(4); + + SDLoc DL(Op); + MVT XLenVT = Subtarget.getXLenVT(); + EVT VT = Op.getValueType(); + SDVTList VTs = DAG.getVTList(VT, VT, MVT::Glue); + + // Lower as follows, unless optimizeSelectCC is able to optimize + // (select_parts condv, tvlo, tvhi, fvlo, fvhi) + // -> (riscvisd::select_cc_parts condv, zero, setne, tvlo, tvhi, fvlo, fvhi) + SDValue Zero = DAG.getConstant(0, DL, XLenVT); + SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); + optimizeSelectCC(Op, CondV, Zero, SetNE, DAG); + SDValue Ops[] = {CondV, Zero, SetNE, TrueVLo, TrueVHi, FalseVLo, FalseVHi}; + + return DAG.getNode(RISCVISD::SELECT_CC_PARTS, DL, VTs, Ops); +} + SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -784,10 +791,13 @@ MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { - switch (MI.getOpcode()) { + unsigned Op = MI.getOpcode(); + + switch (Op) { default: llvm_unreachable("Unexpected instr type to insert"); case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_GPR_Parts_using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: break; @@ -797,6 +807,8 @@ return emitSplitF64Pseudo(MI, BB); } + bool IsParts = Op == RISCV::Select_GPR_Parts_using_CC_GPR; + // To "insert" a SELECT instruction, we actually have to insert the triangle // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to @@ -830,10 +842,11 @@ HeadMBB->addSuccessor(IfFalseMBB); HeadMBB->addSuccessor(TailMBB); - // Insert appropriate branch. - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); - auto CC = static_cast(MI.getOperand(3).getImm()); + // Insert appropriate branch. The +IsParts accounts for the extra return + // value for Select_GPR_Parts_using_CC_GPR. + unsigned LHS = MI.getOperand(1+IsParts).getReg(); + unsigned RHS = MI.getOperand(2+IsParts).getReg(); + auto CC = static_cast(MI.getOperand(3+IsParts).getImm()); unsigned Opcode = getBranchOpcodeForIntCondCode(CC); BuildMI(HeadMBB, DL, TII.get(Opcode)) @@ -844,13 +857,34 @@ // IfFalseMBB just falls through to TailMBB. IfFalseMBB->addSuccessor(TailMBB); - // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] - BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), - MI.getOperand(0).getReg()) - .addReg(MI.getOperand(4).getReg()) - .addMBB(HeadMBB) - .addReg(MI.getOperand(5).getReg()) - .addMBB(IfFalseMBB); + if (IsParts) + { + // %ResultLo = phi [ %TrueValueLo, HeadMBB ], [ %FalseValueLo, IfFalseMBB ] + BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), + MI.getOperand(0).getReg()) + .addReg(MI.getOperand(5).getReg()) + .addMBB(HeadMBB) + .addReg(MI.getOperand(7).getReg()) + .addMBB(IfFalseMBB); + + // %ResultHi = phi [ %TrueValueHi, HeadMBB ], [ %FalseValueHi, IfFalseMBB ] + BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(6).getReg()) + .addMBB(HeadMBB) + .addReg(MI.getOperand(8).getReg()) + .addMBB(IfFalseMBB); + } + else + { + // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] + BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), + MI.getOperand(0).getReg()) + .addReg(MI.getOperand(4).getReg()) + .addMBB(HeadMBB) + .addReg(MI.getOperand(5).getReg()) + .addMBB(IfFalseMBB); + } MI.eraseFromParent(); // The pseudo instruction is gone now. return TailMBB; @@ -1367,6 +1401,33 @@ return Chain; } +void RISCVTargetLowering::optimizeSelectCC(SDValue Op, SDValue &LHS, + SDValue &RHS, SDValue &CC, + SelectionDAG &DAG) const { + // If the result type is XLenVT and CondV is the output of a SETCC node which + // also operated on XLenVT inputs, then merge the SETCC node into the lowered + // node to take advantage of the integer compare+branch instructions. i.e.: + // (select[_parts] (setcc lhs, rhs, cc), ...) + // -> (riscvisd::select_cc[_parts] lhs, rhs, cc, ...) + + SDValue CondV = Op.getOperand(0); + MVT XLenVT = Subtarget.getXLenVT(); + + if (Op.getSimpleValueType() != XLenVT || CondV.getOpcode() != ISD::SETCC || + CondV.getOperand(0).getSimpleValueType() != XLenVT) + return; + + LHS = CondV.getOperand(0); + RHS = CondV.getOperand(1); + auto OptCC = cast(CondV.getOperand(2)); + ISD::CondCode CCVal = OptCC->get(); + + normaliseSetCC(LHS, RHS, CCVal); + + SDLoc DL(Op); + CC = DAG.getConstant(CCVal, DL, XLenVT); +} + /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. @@ -1812,6 +1873,8 @@ return "RISCVISD::CALL"; case RISCVISD::SELECT_CC: return "RISCVISD::SELECT_CC"; + case RISCVISD::SELECT_CC_PARTS: + return "RISCVISD::SELECT_CC_PARTS"; case RISCVISD::BuildPairF64: return "RISCVISD::BuildPairF64"; case RISCVISD::SplitF64: Index: lib/Target/RISCV/RISCVInstrInfo.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfo.td +++ lib/Target/RISCV/RISCVInstrInfo.td @@ -21,10 +21,16 @@ SDTCisVT<1, i32>]>; // Target-dependent type requirements. -def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>; -def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>, - SDTCisSameAs<0, 4>, - SDTCisSameAs<4, 5>]>; +def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>; +def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>, + SDTCisSameAs<0, 4>, + SDTCisSameAs<4, 5>]>; +def SDT_RISCVSelectCCParts : SDTypeProfile<2, 7, [SDTCisSameAs<0, 1>, + SDTCisSameAs<2, 3>, + SDTCisSameAs<5, 6>, + SDTCisSameAs<7, 8>, + SDTCisSameAs<0, 5>, + SDTCisSameAs<5, 7>]>; // Target-independent nodes, but with target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, @@ -46,6 +52,8 @@ [SDNPHasChain, SDNPOptInGlue]>; def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC, [SDNPInGlue]>; +def riscv_selectcc_parts : SDNode<"RISCVISD::SELECT_CC_PARTS", + SDT_RISCVSelectCCParts, [SDNPInGlue]>; def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -790,6 +798,14 @@ def Select_GPR_Using_CC_GPR : SelectCC_rrirr; +let usesCustomInserter = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in +def Select_GPR_Parts_using_CC_GPR + : Pseudo<(outs GPR:$dstlo, GPR:$dsthi), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$imm, + GPR:$truevlo, GPR:$truevhi, GPR:$falsevlo, GPR:$falsevhi), + [(set GPR:$dstlo, GPR:$dsthi, (riscv_selectcc_parts GPR:$lhs, GPR:$rhs, + (XLenVT imm:$imm), GPR:$truevlo, GPR:$truevhi, GPR:$falsevlo, GPR:$falsevhi))]>; + /// Branches and jumps // Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch Index: test/CodeGen/RISCV/atomic-rmw.ll =================================================================== --- test/CodeGen/RISCV/atomic-rmw.ll +++ test/CodeGen/RISCV/atomic-rmw.ll @@ -14566,44 +14566,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB200_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB200_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB200_4 -; RV32I-NEXT: j .LBB200_5 +; RV32I-NEXT: slt a4, s1, a1 +; RV32I-NEXT: j .LBB200_4 ; RV32I-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB200_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB200_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB200_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB200_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB200_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB200_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB200_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB200_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB200_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -14624,44 +14620,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB200_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB200_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB200_4 -; RV32IA-NEXT: j .LBB200_5 +; RV32IA-NEXT: slt a4, s1, a1 +; RV32IA-NEXT: j .LBB200_4 ; RV32IA-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB200_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB200_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB200_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB200_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB200_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB200_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB200_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB200_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB200_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -14727,44 +14719,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB201_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB201_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB201_4 -; RV32I-NEXT: j .LBB201_5 +; RV32I-NEXT: slt a4, s1, a1 +; RV32I-NEXT: j .LBB201_4 ; RV32I-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB201_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB201_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB201_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB201_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB201_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB201_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB201_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB201_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB201_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -14785,44 +14773,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB201_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB201_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB201_4 -; RV32IA-NEXT: j .LBB201_5 +; RV32IA-NEXT: slt a4, s1, a1 +; RV32IA-NEXT: j .LBB201_4 ; RV32IA-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB201_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB201_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB201_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB201_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB201_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB201_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB201_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB201_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB201_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -14888,44 +14872,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB202_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB202_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB202_4 -; RV32I-NEXT: j .LBB202_5 +; RV32I-NEXT: slt a4, s1, a1 +; RV32I-NEXT: j .LBB202_4 ; RV32I-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB202_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB202_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB202_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB202_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB202_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB202_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB202_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB202_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB202_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -14946,44 +14926,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB202_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB202_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB202_4 -; RV32IA-NEXT: j .LBB202_5 +; RV32IA-NEXT: slt a4, s1, a1 +; RV32IA-NEXT: j .LBB202_4 ; RV32IA-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB202_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB202_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB202_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB202_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB202_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB202_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB202_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB202_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB202_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -15049,44 +15025,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB203_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB203_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB203_4 -; RV32I-NEXT: j .LBB203_5 +; RV32I-NEXT: slt a4, s1, a1 +; RV32I-NEXT: j .LBB203_4 ; RV32I-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB203_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB203_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB203_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB203_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB203_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB203_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB203_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB203_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB203_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -15107,44 +15079,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB203_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB203_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB203_4 -; RV32IA-NEXT: j .LBB203_5 +; RV32IA-NEXT: slt a4, s1, a1 +; RV32IA-NEXT: j .LBB203_4 ; RV32IA-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB203_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB203_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB203_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB203_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB203_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB203_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB203_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB203_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB203_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -15210,44 +15178,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB204_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB204_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB204_4 -; RV32I-NEXT: j .LBB204_5 +; RV32I-NEXT: slt a4, s1, a1 +; RV32I-NEXT: j .LBB204_4 ; RV32I-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB204_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB204_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB204_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB204_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB204_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB204_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB204_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB204_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB204_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -15268,44 +15232,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB204_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB204_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB204_4 -; RV32IA-NEXT: j .LBB204_5 +; RV32IA-NEXT: slt a4, s1, a1 +; RV32IA-NEXT: j .LBB204_4 ; RV32IA-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB204_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB204_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB204_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB204_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB204_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB204_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB204_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB204_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB204_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -15371,45 +15331,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB205_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB205_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 +; RV32I-NEXT: slt a2, s1, a1 ; RV32I-NEXT: j .LBB205_4 ; RV32I-NEXT: .LBB205_3: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB205_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB205_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB205_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB205_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB205_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB205_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB205_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB205_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -15430,45 +15386,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB205_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB205_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 +; RV32IA-NEXT: slt a2, s1, a1 ; RV32IA-NEXT: j .LBB205_4 ; RV32IA-NEXT: .LBB205_3: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB205_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB205_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB205_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB205_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB205_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB205_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB205_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB205_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -15534,45 +15486,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB206_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB206_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 +; RV32I-NEXT: slt a2, s1, a1 ; RV32I-NEXT: j .LBB206_4 ; RV32I-NEXT: .LBB206_3: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB206_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB206_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB206_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB206_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB206_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB206_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB206_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB206_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -15593,45 +15541,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB206_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB206_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 +; RV32IA-NEXT: slt a2, s1, a1 ; RV32IA-NEXT: j .LBB206_4 ; RV32IA-NEXT: .LBB206_3: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB206_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB206_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB206_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB206_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB206_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB206_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB206_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB206_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -15697,45 +15641,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB207_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB207_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 +; RV32I-NEXT: slt a2, s1, a1 ; RV32I-NEXT: j .LBB207_4 ; RV32I-NEXT: .LBB207_3: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB207_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB207_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB207_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB207_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB207_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB207_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB207_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB207_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -15756,45 +15696,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB207_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB207_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 +; RV32IA-NEXT: slt a2, s1, a1 ; RV32IA-NEXT: j .LBB207_4 ; RV32IA-NEXT: .LBB207_3: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB207_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB207_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB207_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB207_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB207_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB207_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB207_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB207_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -15860,45 +15796,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB208_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB208_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 +; RV32I-NEXT: slt a2, s1, a1 ; RV32I-NEXT: j .LBB208_4 ; RV32I-NEXT: .LBB208_3: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB208_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB208_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB208_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB208_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB208_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB208_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB208_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB208_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -15919,45 +15851,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB208_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB208_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 +; RV32IA-NEXT: slt a2, s1, a1 ; RV32IA-NEXT: j .LBB208_4 ; RV32IA-NEXT: .LBB208_3: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB208_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB208_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB208_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB208_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB208_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB208_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB208_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB208_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16023,45 +15951,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB209_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB209_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: slt a0, s1, a1 +; RV32I-NEXT: slt a2, s1, a1 ; RV32I-NEXT: j .LBB209_4 ; RV32I-NEXT: .LBB209_3: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB209_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB209_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB209_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB209_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB209_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB209_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB209_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB209_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -16082,45 +16006,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB209_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB209_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: slt a0, s1, a1 +; RV32IA-NEXT: slt a2, s1, a1 ; RV32IA-NEXT: j .LBB209_4 ; RV32IA-NEXT: .LBB209_3: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB209_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB209_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB209_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB209_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB209_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB209_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB209_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB209_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16186,44 +16106,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB210_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB210_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB210_4 -; RV32I-NEXT: j .LBB210_5 +; RV32I-NEXT: sltu a4, s1, a1 +; RV32I-NEXT: j .LBB210_4 ; RV32I-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB210_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB210_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB210_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB210_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB210_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB210_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB210_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB210_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB210_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -16244,44 +16160,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB210_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB210_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB210_4 -; RV32IA-NEXT: j .LBB210_5 +; RV32IA-NEXT: sltu a4, s1, a1 +; RV32IA-NEXT: j .LBB210_4 ; RV32IA-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB210_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB210_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB210_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB210_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB210_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB210_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB210_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB210_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB210_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16347,44 +16259,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB211_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB211_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB211_4 -; RV32I-NEXT: j .LBB211_5 +; RV32I-NEXT: sltu a4, s1, a1 +; RV32I-NEXT: j .LBB211_4 ; RV32I-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB211_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB211_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB211_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB211_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB211_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB211_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB211_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB211_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB211_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -16405,44 +16313,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB211_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB211_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB211_4 -; RV32IA-NEXT: j .LBB211_5 +; RV32IA-NEXT: sltu a4, s1, a1 +; RV32IA-NEXT: j .LBB211_4 ; RV32IA-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB211_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB211_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB211_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB211_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB211_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB211_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB211_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB211_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB211_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16508,44 +16412,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB212_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB212_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB212_4 -; RV32I-NEXT: j .LBB212_5 +; RV32I-NEXT: sltu a4, s1, a1 +; RV32I-NEXT: j .LBB212_4 ; RV32I-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB212_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB212_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB212_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB212_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB212_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB212_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB212_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB212_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB212_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -16566,44 +16466,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB212_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB212_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB212_4 -; RV32IA-NEXT: j .LBB212_5 +; RV32IA-NEXT: sltu a4, s1, a1 +; RV32IA-NEXT: j .LBB212_4 ; RV32IA-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB212_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB212_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB212_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB212_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB212_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB212_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB212_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB212_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB212_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16669,44 +16565,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB213_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB213_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB213_4 -; RV32I-NEXT: j .LBB213_5 +; RV32I-NEXT: sltu a4, s1, a1 +; RV32I-NEXT: j .LBB213_4 ; RV32I-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB213_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB213_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB213_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB213_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB213_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB213_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB213_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB213_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB213_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -16727,44 +16619,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB213_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB213_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB213_4 -; RV32IA-NEXT: j .LBB213_5 +; RV32IA-NEXT: sltu a4, s1, a1 +; RV32IA-NEXT: j .LBB213_4 ; RV32IA-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB213_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB213_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB213_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB213_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB213_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB213_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB213_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB213_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB213_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16830,44 +16718,40 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB214_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB214_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB214_4 -; RV32I-NEXT: j .LBB214_5 +; RV32I-NEXT: sltu a4, s1, a1 +; RV32I-NEXT: j .LBB214_4 ; RV32I-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB214_5 +; RV32I-NEXT: sltu a4, s2, a0 ; RV32I-NEXT: .LBB214_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB214_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB214_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB214_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB214_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB214_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB214_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB214_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -16888,44 +16772,40 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB214_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB214_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB214_4 -; RV32IA-NEXT: j .LBB214_5 +; RV32IA-NEXT: sltu a4, s1, a1 +; RV32IA-NEXT: j .LBB214_4 ; RV32IA-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB214_5 +; RV32IA-NEXT: sltu a4, s2, a0 ; RV32IA-NEXT: .LBB214_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB214_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB214_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB214_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB214_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB214_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB214_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB214_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -16991,45 +16871,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB215_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB215_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 +; RV32I-NEXT: sltu a2, s1, a1 ; RV32I-NEXT: j .LBB215_4 ; RV32I-NEXT: .LBB215_3: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB215_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB215_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB215_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB215_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB215_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB215_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB215_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB215_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -17050,45 +16926,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB215_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB215_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 +; RV32IA-NEXT: sltu a2, s1, a1 ; RV32IA-NEXT: j .LBB215_4 ; RV32IA-NEXT: .LBB215_3: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB215_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB215_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB215_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB215_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB215_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB215_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: mv a4, zero ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB215_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB215_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -17154,45 +17026,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB216_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB216_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 +; RV32I-NEXT: sltu a2, s1, a1 ; RV32I-NEXT: j .LBB216_4 ; RV32I-NEXT: .LBB216_3: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB216_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB216_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB216_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB216_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB216_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB216_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB216_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB216_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -17213,45 +17081,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB216_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB216_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 +; RV32IA-NEXT: sltu a2, s1, a1 ; RV32IA-NEXT: j .LBB216_4 ; RV32IA-NEXT: .LBB216_3: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB216_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB216_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB216_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB216_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB216_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB216_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 2 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB216_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB216_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -17317,45 +17181,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB217_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB217_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 +; RV32I-NEXT: sltu a2, s1, a1 ; RV32I-NEXT: j .LBB217_4 ; RV32I-NEXT: .LBB217_3: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB217_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB217_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB217_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB217_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB217_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB217_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 3 ; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB217_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB217_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -17376,45 +17236,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB217_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB217_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 +; RV32IA-NEXT: sltu a2, s1, a1 ; RV32IA-NEXT: j .LBB217_4 ; RV32IA-NEXT: .LBB217_3: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB217_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB217_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB217_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB217_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB217_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB217_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 3 ; RV32IA-NEXT: mv a5, zero ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB217_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB217_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -17480,45 +17336,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB218_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB218_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 +; RV32I-NEXT: sltu a2, s1, a1 ; RV32I-NEXT: j .LBB218_4 ; RV32I-NEXT: .LBB218_3: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB218_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB218_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB218_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB218_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB218_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB218_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 4 ; RV32I-NEXT: addi a5, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB218_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB218_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -17539,45 +17391,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB218_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB218_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 +; RV32IA-NEXT: sltu a2, s1, a1 ; RV32IA-NEXT: j .LBB218_4 ; RV32IA-NEXT: .LBB218_3: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB218_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB218_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB218_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB218_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB218_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB218_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 4 ; RV32IA-NEXT: addi a5, zero, 2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB218_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB218_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) @@ -17643,45 +17491,41 @@ ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: mv s4, sp ; RV32I-NEXT: .LBB219_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: beq a1, s1, .LBB219_3 ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sltu a0, s1, a1 +; RV32I-NEXT: sltu a2, s1, a1 ; RV32I-NEXT: j .LBB219_4 ; RV32I-NEXT: .LBB219_3: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: sltu a2, s2, a0 ; RV32I-NEXT: .LBB219_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB219_6 +; RV32I-NEXT: xori a4, a2, 1 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a4, .LBB219_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB219_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB219_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: .LBB219_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: addi a5, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB219_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a2, .LBB219_1 +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: lw s4, 12(sp) ; RV32I-NEXT: lw s3, 16(sp) ; RV32I-NEXT: lw s2, 20(sp) @@ -17702,45 +17546,41 @@ ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: mv s3, a0 ; RV32IA-NEXT: lw a1, 4(a0) -; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: mv s4, sp ; RV32IA-NEXT: .LBB219_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: beq a1, s1, .LBB219_3 ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a1 +; RV32IA-NEXT: sltu a2, s1, a1 ; RV32IA-NEXT: j .LBB219_4 ; RV32IA-NEXT: .LBB219_3: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: sltu a2, s2, a0 ; RV32IA-NEXT: .LBB219_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB219_6 +; RV32IA-NEXT: xori a4, a2, 1 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: bnez a4, .LBB219_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB219_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB219_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: .LBB219_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32IA-NEXT: sw a0, 0(sp) ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s3 ; RV32IA-NEXT: mv a1, s4 ; RV32IA-NEXT: addi a4, zero, 5 ; RV32IA-NEXT: addi a5, zero, 5 ; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: mv a2, a0 ; RV32IA-NEXT: lw a1, 4(sp) -; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB219_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: lw a0, 0(sp) +; RV32IA-NEXT: beqz a2, .LBB219_1 +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: lw s4, 12(sp) ; RV32IA-NEXT: lw s3, 16(sp) ; RV32IA-NEXT: lw s2, 20(sp) Index: test/CodeGen/RISCV/select-parts.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/select-parts.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +; Check that selects of wide values don't introduce unnecessary control flow. +; We should get ISD::SELECT -> ISD::SELECT_PARTS -> RISCVISD::SELECT_CC_PARTS +; -> Select_GPR_Parts_using_CC_GPR pseudo instruction -> single branch per two +; parts. + +define i64 @cmovcc64(i32 signext %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmovcc64: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a5, zero, 123 +; RV32I-NEXT: beq a0, a5, .LBB0_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB0_2: # %entry +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovcc64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a3, zero, 123 +; RV64I-NEXT: beq a0, a3, .LBB0_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: .LBB0_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +entry: + %cmp = icmp eq i32 %a, 123 + %cond = select i1 %cmp, i64 %b, i64 %c + ret i64 %cond +} + +define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind { +; RV32I-LABEL: cmovcc128: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: xori a1, a1, 123 +; RV32I-NEXT: or a5, a1, a2 +; RV32I-NEXT: beqz a5, .LBB1_3 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: lw a1, 4(a4) +; RV32I-NEXT: lw a2, 0(a4) +; RV32I-NEXT: bnez a5, .LBB1_4 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: lw a5, 12(a3) +; RV32I-NEXT: lw a3, 8(a3) +; RV32I-NEXT: j .LBB1_5 +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: lw a1, 4(a3) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: beqz a5, .LBB1_2 +; RV32I-NEXT: .LBB1_4: # %entry +; RV32I-NEXT: lw a5, 12(a4) +; RV32I-NEXT: lw a3, 8(a4) +; RV32I-NEXT: .LBB1_5: # %entry +; RV32I-NEXT: sw a5, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovcc128: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a5, zero, 123 +; RV64I-NEXT: beq a0, a5, .LBB1_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a2, a4 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: .LBB1_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: ret +entry: + %cmp = icmp eq i64 %a, 123 + %cond = select i1 %cmp, i128 %b, i128 %c + ret i128 %cond +} + +define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmov64: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bnez a0, .LBB2_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB2_2: # %entry +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmov64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB2_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: .LBB2_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +entry: + %cond = select i1 %a, i64 %b, i64 %c + ret i64 %cond +} + +define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { +; RV32I-LABEL: cmov128: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bnez a1, .LBB3_3 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: lw a4, 4(a3) +; RV32I-NEXT: lw a5, 0(a3) +; RV32I-NEXT: beqz a1, .LBB3_4 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: lw a1, 12(a2) +; RV32I-NEXT: lw a2, 8(a2) +; RV32I-NEXT: j .LBB3_5 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: lw a4, 4(a2) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: bnez a1, .LBB3_2 +; RV32I-NEXT: .LBB3_4: # %entry +; RV32I-NEXT: lw a1, 12(a3) +; RV32I-NEXT: lw a2, 8(a3) +; RV32I-NEXT: .LBB3_5: # %entry +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a5, 0(a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmov128: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: bnez a0, .LBB3_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a2, a4 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: .LBB3_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: ret +entry: + %cond = select i1 %a, i128 %b, i128 %c + ret i128 %cond +}