diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1579,8 +1579,7 @@ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) .addReg(DesiredReg, RegState::Kill); - if (!IsThumb) - MIB.addImm(0); + MIB.addImm(0); // A1/T2 rotate operand MIB.add(predOps(ARMCC::AL)); } @@ -2782,14 +2781,14 @@ case ARM::CMP_SWAP_8: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, - ARM::tUXTB, NextMBBI); + ARM::t2UXTB, NextMBBI); else return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, NextMBBI); case ARM::CMP_SWAP_16: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, - ARM::tUXTH, NextMBBI); + ARM::t2UXTH, NextMBBI); else return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, NextMBBI); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19120,6 +19120,14 @@ if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -17,66 +17,131 @@ define i8 @test_xchg_i8() { ; CHECK-ARM8-LABEL: test_xchg_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB0_1 -; CHECK-ARM8-NEXT: .LBB0_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB0_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB0_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB0_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB0_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB0_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB0_2 +; CHECK-ARM8-NEXT: .LBB0_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB0_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: mov r1, #1 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB0_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB0_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_xchg_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB0_1 -; CHECK-ARM6-NEXT: .LBB0_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI0_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB0_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r3, .LCPI0_1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB0_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB0_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB0_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB0_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB0_2 +; CHECK-ARM6-NEXT: .LBB0_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB0_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI0_0 -; CHECK-ARM6-NEXT: mov r1, #1 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB0_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB0_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI0_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI0_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_xchg_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB0_1 ; CHECK-THUMB2-NEXT: .LBB0_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB0_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB0_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB0_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB0_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB0_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB0_2 +; CHECK-THUMB2-NEXT: .LBB0_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB0_1 Depth=1 ; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB0_1 -; CHECK-THUMB2-NEXT: b .LBB0_2 -; CHECK-THUMB2-NEXT: .LBB0_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB0_5 +; CHECK-THUMB2-NEXT: .LBB0_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_xchg_i8: @@ -97,70 +162,131 @@ define i8 @test_add_i8() { ; CHECK-ARM8-LABEL: test_add_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB1_1 -; CHECK-ARM8-NEXT: .LBB1_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB1_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: add r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB1_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB1_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB1_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB1_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB1_2 +; CHECK-ARM8-NEXT: .LBB1_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, #1 -; CHECK-ARM8-NEXT: add r0, r0, r1 -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB1_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB1_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_add_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB1_1 -; CHECK-ARM6-NEXT: .LBB1_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI1_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB1_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: add r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI1_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB1_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB1_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB1_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB1_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB1_2 +; CHECK-ARM6-NEXT: .LBB1_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: add r0, r0, #1 -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI1_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB1_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB1_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI1_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI1_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_add_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB1_1 ; CHECK-THUMB2-NEXT: .LBB1_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: adds r1, r0, #1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB1_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: add.w r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB1_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB1_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB1_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB1_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB1_2 +; CHECK-THUMB2-NEXT: .LBB1_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB1_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB1_1 -; CHECK-THUMB2-NEXT: b .LBB1_2 -; CHECK-THUMB2-NEXT: .LBB1_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB1_5 +; CHECK-THUMB2-NEXT: .LBB1_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_add_i8: @@ -181,70 +307,131 @@ define i8 @test_sub_i8() { ; CHECK-ARM8-LABEL: test_sub_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB2_1 -; CHECK-ARM8-NEXT: .LBB2_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB2_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sub r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB2_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB2_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB2_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB2_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB2_2 +; CHECK-ARM8-NEXT: .LBB2_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB2_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, #1 -; CHECK-ARM8-NEXT: sub r0, r0, r1 -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB2_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB2_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_sub_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB2_1 -; CHECK-ARM6-NEXT: .LBB2_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI2_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB2_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sub r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI2_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB2_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB2_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB2_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB2_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB2_2 +; CHECK-ARM6-NEXT: .LBB2_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB2_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: sub r0, r0, #1 -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI2_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB2_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB2_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI2_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI2_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_sub_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB2_1 ; CHECK-THUMB2-NEXT: .LBB2_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: subs r1, r0, #1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB2_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sub.w r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB2_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB2_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB2_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB2_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB2_2 +; CHECK-THUMB2-NEXT: .LBB2_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB2_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB2_1 -; CHECK-THUMB2-NEXT: b .LBB2_2 -; CHECK-THUMB2-NEXT: .LBB2_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB2_5 +; CHECK-THUMB2-NEXT: .LBB2_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_sub_i8: @@ -265,69 +452,131 @@ define i8 @test_and_i8() { ; CHECK-ARM8-LABEL: test_and_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB3_1 -; CHECK-ARM8-NEXT: .LBB3_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB3_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: and r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB3_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB3_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB3_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB3_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB3_2 +; CHECK-ARM8-NEXT: .LBB3_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB3_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r0, r0, #1 -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB3_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB3_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_and_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB3_1 -; CHECK-ARM6-NEXT: .LBB3_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI3_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB3_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: and r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI3_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB3_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB3_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB3_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB3_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB3_2 +; CHECK-ARM6-NEXT: .LBB3_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB3_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r0, r0, #1 -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI3_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB3_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB3_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI3_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI3_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_and_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB3_1 ; CHECK-THUMB2-NEXT: .LBB3_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: and r1, r0, #1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB3_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: and r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB3_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB3_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB3_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB3_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB3_2 +; CHECK-THUMB2-NEXT: .LBB3_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB3_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB3_1 -; CHECK-THUMB2-NEXT: b .LBB3_2 -; CHECK-THUMB2-NEXT: .LBB3_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB3_5 +; CHECK-THUMB2-NEXT: .LBB3_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_and_i8: @@ -348,74 +597,136 @@ define i8 @test_nand_i8() { ; CHECK-ARM8-LABEL: test_nand_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB4_1 -; CHECK-ARM8-NEXT: .LBB4_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB4_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mvn r0, r1 +; CHECK-ARM8-NEXT: mvn r2, #1 +; CHECK-ARM8-NEXT: orr r12, r0, r2 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB4_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB4_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB4_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB4_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB4_2 +; CHECK-ARM8-NEXT: .LBB4_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB4_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mvn r0, r0 -; CHECK-ARM8-NEXT: mvn r1, #1 -; CHECK-ARM8-NEXT: orr r0, r0, r1 -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB4_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB4_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_nand_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB4_1 -; CHECK-ARM6-NEXT: .LBB4_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI4_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB4_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mvn r0, r1 +; CHECK-ARM6-NEXT: mvn r2, #1 +; CHECK-ARM6-NEXT: orr r12, r0, r2 +; CHECK-ARM6-NEXT: ldr r3, .LCPI4_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB4_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB4_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB4_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB4_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB4_2 +; CHECK-ARM6-NEXT: .LBB4_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB4_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mvn r0, r0 -; CHECK-ARM6-NEXT: mvn r1, #1 -; CHECK-ARM6-NEXT: orr r0, r0, r1 -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI4_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB4_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB4_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI4_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI4_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_nand_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB4_1 ; CHECK-THUMB2-NEXT: .LBB4_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r1, [r2] -; CHECK-THUMB2-NEXT: mov r0, r1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB4_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mvn r0, #1 +; CHECK-THUMB2-NEXT: orn r12, r0, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB4_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB4_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB4_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB4_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB4_2 +; CHECK-THUMB2-NEXT: .LBB4_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB4_1 Depth=1 ; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r0, #254 -; CHECK-THUMB2-NEXT: orn r1, r0, r1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB4_1 -; CHECK-THUMB2-NEXT: b .LBB4_2 -; CHECK-THUMB2-NEXT: .LBB4_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB4_5 +; CHECK-THUMB2-NEXT: .LBB4_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_nand_i8: @@ -436,70 +747,131 @@ define i8 @test_or_i8() { ; CHECK-ARM8-LABEL: test_or_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB5_1 -; CHECK-ARM8-NEXT: .LBB5_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB5_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: orr r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB5_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB5_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB5_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB5_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB5_2 +; CHECK-ARM8-NEXT: .LBB5_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB5_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, #1 -; CHECK-ARM8-NEXT: orr r0, r0, r1 -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB5_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB5_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_or_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB5_1 -; CHECK-ARM6-NEXT: .LBB5_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI5_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB5_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: orr r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI5_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB5_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB5_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB5_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB5_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB5_2 +; CHECK-ARM6-NEXT: .LBB5_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB5_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: orr r0, r0, #1 -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI5_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB5_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB5_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI5_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI5_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_or_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB5_1 ; CHECK-THUMB2-NEXT: .LBB5_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: orr r1, r0, #1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB5_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: orr r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB5_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB5_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB5_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB5_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB5_2 +; CHECK-THUMB2-NEXT: .LBB5_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB5_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB5_1 -; CHECK-THUMB2-NEXT: b .LBB5_2 -; CHECK-THUMB2-NEXT: .LBB5_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB5_5 +; CHECK-THUMB2-NEXT: .LBB5_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_or_i8: @@ -520,69 +892,131 @@ define i8 @test_xor_i8() { ; CHECK-ARM8-LABEL: test_xor_i8: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB6_1 -; CHECK-ARM8-NEXT: .LBB6_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r0, [r0] +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB6_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB6_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: eor r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB6_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB6_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB6_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB6_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB6_2 +; CHECK-ARM8-NEXT: .LBB6_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB6_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: eor r0, r0, #1 -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB6_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB6_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_xor_i8: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB6_1 -; CHECK-ARM6-NEXT: .LBB6_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI6_0 -; CHECK-ARM6-NEXT: ldrexb r0, [r0] +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB6_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB6_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: eor r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI6_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB6_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB6_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB6_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB6_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB6_2 +; CHECK-ARM6-NEXT: .LBB6_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB6_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: eor r0, r0, #1 -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI6_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB6_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB6_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI6_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI6_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_xor_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB6_1 ; CHECK-THUMB2-NEXT: .LBB6_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: eor r1, r0, #1 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB6_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: eor r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB6_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB6_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB6_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB6_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB6_2 +; CHECK-THUMB2-NEXT: .LBB6_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB6_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB6_1 -; CHECK-THUMB2-NEXT: b .LBB6_2 -; CHECK-THUMB2-NEXT: .LBB6_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB6_5 +; CHECK-THUMB2-NEXT: .LBB6_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_xor_i8: @@ -604,26 +1038,43 @@ ; CHECK-ARM8-LABEL: test_max_i8: ; CHECK-ARM8: @ %bb.0: @ %entry ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB7_1 -; CHECK-ARM8-NEXT: .LBB7_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r1, [r0] -; CHECK-ARM8-NEXT: sxtb r2, r1 -; CHECK-ARM8-NEXT: mov r0, r2 +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB7_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sxtb r0, r1 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: cmp r0, #1 +; CHECK-ARM8-NEXT: movgt r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB7_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB7_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB7_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB7_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB7_2 +; CHECK-ARM8-NEXT: .LBB7_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB7_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 -; CHECK-ARM8-NEXT: cmp r2, #1 -; CHECK-ARM8-NEXT: movgt r0, r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB7_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB7_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr @@ -631,55 +1082,96 @@ ; CHECK-ARM6-LABEL: test_max_i8: ; CHECK-ARM6: @ %bb.0: @ %entry ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB7_1 -; CHECK-ARM6-NEXT: .LBB7_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI7_0 -; CHECK-ARM6-NEXT: ldrexb r1, [r0] -; CHECK-ARM6-NEXT: sxtb r2, r1 -; CHECK-ARM6-NEXT: mov r0, r2 +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB7_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sxtb r0, r1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: cmp r0, #1 +; CHECK-ARM6-NEXT: movgt r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI7_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB7_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB7_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB7_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB7_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB7_2 +; CHECK-ARM6-NEXT: .LBB7_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB7_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 -; CHECK-ARM6-NEXT: cmp r2, #1 -; CHECK-ARM6-NEXT: movgt r0, r1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI7_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB7_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB7_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI7_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI7_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_max_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB7_1 ; CHECK-THUMB2-NEXT: .LBB7_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: sxtb r3, r0 -; CHECK-THUMB2-NEXT: mov r1, r3 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r3, #1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB7_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sxtb r0, r1 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r0, #1 ; CHECK-THUMB2-NEXT: it gt -; CHECK-THUMB2-NEXT: movgt r1, r0 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movgt r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB7_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB7_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB7_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB7_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB7_2 +; CHECK-THUMB2-NEXT: .LBB7_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB7_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB7_1 -; CHECK-THUMB2-NEXT: b .LBB7_2 -; CHECK-THUMB2-NEXT: .LBB7_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB7_5 +; CHECK-THUMB2-NEXT: .LBB7_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_max_i8: @@ -701,26 +1193,43 @@ ; CHECK-ARM8-LABEL: test_min_i8: ; CHECK-ARM8: @ %bb.0: @ %entry ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB8_1 -; CHECK-ARM8-NEXT: .LBB8_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r1, [r0] -; CHECK-ARM8-NEXT: sxtb r2, r1 -; CHECK-ARM8-NEXT: mov r0, r2 +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB8_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB8_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sxtb r0, r1 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: cmp r0, #2 +; CHECK-ARM8-NEXT: movlt r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: .LBB8_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB8_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB8_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB8_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB8_2 +; CHECK-ARM8-NEXT: .LBB8_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB8_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 -; CHECK-ARM8-NEXT: cmp r2, #2 -; CHECK-ARM8-NEXT: movlt r0, r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB8_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxtb r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB8_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr @@ -728,55 +1237,96 @@ ; CHECK-ARM6-LABEL: test_min_i8: ; CHECK-ARM6: @ %bb.0: @ %entry ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB8_1 -; CHECK-ARM6-NEXT: .LBB8_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI8_0 -; CHECK-ARM6-NEXT: ldrexb r1, [r0] -; CHECK-ARM6-NEXT: sxtb r2, r1 -; CHECK-ARM6-NEXT: mov r0, r2 +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB8_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB8_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sxtb r0, r1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: cmp r0, #2 +; CHECK-ARM6-NEXT: movlt r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI8_1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: .LBB8_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB8_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB8_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB8_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB8_2 +; CHECK-ARM6-NEXT: .LBB8_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB8_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 -; CHECK-ARM6-NEXT: cmp r2, #2 -; CHECK-ARM6-NEXT: movlt r0, r1 +; CHECK-ARM6-NEXT: uxtb r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI8_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB8_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB8_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI8_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI8_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_min_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB8_1 ; CHECK-THUMB2-NEXT: .LBB8_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: sxtb r3, r0 -; CHECK-THUMB2-NEXT: mov r1, r3 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r3, #2 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB8_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sxtb r0, r1 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r0, #2 ; CHECK-THUMB2-NEXT: it lt -; CHECK-THUMB2-NEXT: movlt r1, r0 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movlt r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: .LBB8_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB8_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB8_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB8_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB8_2 +; CHECK-THUMB2-NEXT: .LBB8_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB8_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxtb r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB8_1 -; CHECK-THUMB2-NEXT: b .LBB8_2 -; CHECK-THUMB2-NEXT: .LBB8_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB8_5 +; CHECK-THUMB2-NEXT: .LBB8_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_min_i8: @@ -797,79 +1347,144 @@ define i8 @test_umax_i8() { ; CHECK-ARM8-LABEL: test_umax_i8: ; CHECK-ARM8: @ %bb.0: @ %entry +; CHECK-ARM8-NEXT: push {r11, lr} +; CHECK-ARM8-NEXT: mov r11, sp ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB9_1 -; CHECK-ARM8-NEXT: .LBB9_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB9_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: uxtb r1, r12 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 -; CHECK-ARM8-NEXT: movhi r0, r1 +; CHECK-ARM8-NEXT: movhi lr, r12 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r12, r12 +; CHECK-ARM8-NEXT: .LBB9_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB9_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r12 +; CHECK-ARM8-NEXT: bne .LBB9_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB9_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, lr, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB9_2 +; CHECK-ARM8-NEXT: .LBB9_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB9_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB9_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: beq .LBB9_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: mov sp, r11 +; CHECK-ARM8-NEXT: pop {r11, pc} ; ; CHECK-ARM6-LABEL: test_umax_i8: ; CHECK-ARM6: @ %bb.0: @ %entry +; CHECK-ARM6-NEXT: push {r11, lr} +; CHECK-ARM6-NEXT: mov r11, sp ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB9_1 -; CHECK-ARM6-NEXT: .LBB9_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI9_0 -; CHECK-ARM6-NEXT: ldrexb r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB9_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: uxtb r1, r12 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 -; CHECK-ARM6-NEXT: movhi r0, r1 +; CHECK-ARM6-NEXT: movhi lr, r12 +; CHECK-ARM6-NEXT: ldr r3, .LCPI9_1 +; CHECK-ARM6-NEXT: uxtb r12, r12 +; CHECK-ARM6-NEXT: .LBB9_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB9_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r12 +; CHECK-ARM6-NEXT: bne .LBB9_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB9_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, lr, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB9_2 +; CHECK-ARM6-NEXT: .LBB9_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB9_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI9_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB9_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB9_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: mov sp, r11 +; CHECK-ARM6-NEXT: pop {r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI9_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI9_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_umax_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: push {r7, lr} +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB9_1 ; CHECK-THUMB2-NEXT: .LBB9_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB9_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr.w r12, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: uxtb.w r1, r12 +; CHECK-THUMB2-NEXT: mov.w lr, #1 +; CHECK-THUMB2-NEXT: cmp r1, #1 ; CHECK-THUMB2-NEXT: it hi -; CHECK-THUMB2-NEXT: movhi r1, r0 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movhi lr, r12 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb.w r12, r12 +; CHECK-THUMB2-NEXT: .LBB9_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB9_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r12 +; CHECK-THUMB2-NEXT: bne .LBB9_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB9_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, lr, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB9_2 +; CHECK-THUMB2-NEXT: .LBB9_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB9_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB9_1 -; CHECK-THUMB2-NEXT: b .LBB9_2 -; CHECK-THUMB2-NEXT: .LBB9_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB9_5 +; CHECK-THUMB2-NEXT: .LBB9_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #8 +; CHECK-THUMB2-NEXT: pop {r7, pc} ; ; CHECK-THUMB1-LABEL: test_umax_i8: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -889,79 +1504,144 @@ define i8 @test_umin_i8() { ; CHECK-ARM8-LABEL: test_umin_i8: ; CHECK-ARM8: @ %bb.0: @ %entry +; CHECK-ARM8-NEXT: push {r11, lr} +; CHECK-ARM8-NEXT: mov r11, sp ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB10_1 -; CHECK-ARM8-NEXT: .LBB10_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: ldrexb r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 +; CHECK-ARM8-NEXT: ldrb r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB10_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: uxtb r1, r12 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #2 -; CHECK-ARM8-NEXT: movlo r0, r1 +; CHECK-ARM8-NEXT: movlo lr, r12 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-ARM8-NEXT: uxtb r12, r12 +; CHECK-ARM8-NEXT: .LBB10_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB10_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexb r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r12 +; CHECK-ARM8-NEXT: bne .LBB10_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB10_2 Depth=2 +; CHECK-ARM8-NEXT: strexb r2, lr, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB10_2 +; CHECK-ARM8-NEXT: .LBB10_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB10_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r1, r0, #255 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-ARM8-NEXT: strexb r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB10_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: beq .LBB10_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: mov sp, r11 +; CHECK-ARM8-NEXT: pop {r11, pc} ; ; CHECK-ARM6-LABEL: test_umin_i8: ; CHECK-ARM6: @ %bb.0: @ %entry +; CHECK-ARM6-NEXT: push {r11, lr} +; CHECK-ARM6-NEXT: mov r11, sp ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB10_1 -; CHECK-ARM6-NEXT: .LBB10_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI10_0 -; CHECK-ARM6-NEXT: ldrexb r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 +; CHECK-ARM6-NEXT: ldrb r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB10_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: uxtb r1, r12 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #2 -; CHECK-ARM6-NEXT: movlo r0, r1 +; CHECK-ARM6-NEXT: movlo lr, r12 +; CHECK-ARM6-NEXT: ldr r3, .LCPI10_1 +; CHECK-ARM6-NEXT: uxtb r12, r12 +; CHECK-ARM6-NEXT: .LBB10_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB10_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexb r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r12 +; CHECK-ARM6-NEXT: bne .LBB10_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB10_2 Depth=2 +; CHECK-ARM6-NEXT: strexb r2, lr, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB10_2 +; CHECK-ARM6-NEXT: .LBB10_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB10_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r1, r0, #255 -; CHECK-ARM6-NEXT: ldr r2, .LCPI10_0 -; CHECK-ARM6-NEXT: strexb r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB10_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB10_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: mov sp, r11 +; CHECK-ARM6-NEXT: pop {r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI10_0: ; CHECK-ARM6-NEXT: .long atomic_i8 +; CHECK-ARM6-NEXT: .LCPI10_1: +; CHECK-ARM6-NEXT: .long atomic_i8 ; ; CHECK-THUMB2-LABEL: test_umin_i8: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: push {r7, lr} +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: ldrb r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB10_1 ; CHECK-THUMB2-NEXT: .LBB10_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i8 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i8 -; CHECK-THUMB2-NEXT: ldrexb r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #2 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB10_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr.w r12, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: uxtb.w r1, r12 +; CHECK-THUMB2-NEXT: mov.w lr, #1 +; CHECK-THUMB2-NEXT: cmp r1, #2 ; CHECK-THUMB2-NEXT: it lo -; CHECK-THUMB2-NEXT: movlo r1, r0 -; CHECK-THUMB2-NEXT: strexb r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movlo lr, r12 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i8 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i8 +; CHECK-THUMB2-NEXT: uxtb.w r12, r12 +; CHECK-THUMB2-NEXT: .LBB10_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB10_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexb r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r12 +; CHECK-THUMB2-NEXT: bne .LBB10_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB10_2 Depth=2 +; CHECK-THUMB2-NEXT: strexb r2, lr, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB10_2 +; CHECK-THUMB2-NEXT: .LBB10_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB10_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB10_1 -; CHECK-THUMB2-NEXT: b .LBB10_2 -; CHECK-THUMB2-NEXT: .LBB10_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB10_5 +; CHECK-THUMB2-NEXT: .LBB10_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #8 +; CHECK-THUMB2-NEXT: pop {r7, pc} ; ; CHECK-THUMB1-LABEL: test_umin_i8: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -983,66 +1663,131 @@ define i16 @test_xchg_i16() { ; CHECK-ARM8-LABEL: test_xchg_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB11_1 -; CHECK-ARM8-NEXT: .LBB11_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB11_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB11_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB11_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB11_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB11_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB11_2 +; CHECK-ARM8-NEXT: .LBB11_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB11_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: mov r1, #1 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB11_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB11_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_xchg_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB11_1 -; CHECK-ARM6-NEXT: .LBB11_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI11_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] -; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI11_0 -; CHECK-ARM6-NEXT: mov r1, #1 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB11_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB11_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r3, .LCPI11_1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB11_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB11_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB11_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB11_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB11_2 +; CHECK-ARM6-NEXT: .LBB11_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB11_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB11_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI11_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI11_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_xchg_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB11_1 ; CHECK-THUMB2-NEXT: .LBB11_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB11_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB11_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB11_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB11_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB11_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB11_2 +; CHECK-THUMB2-NEXT: .LBB11_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB11_1 Depth=1 ; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB11_1 -; CHECK-THUMB2-NEXT: b .LBB11_2 -; CHECK-THUMB2-NEXT: .LBB11_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB11_5 +; CHECK-THUMB2-NEXT: .LBB11_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_xchg_i16: @@ -1063,70 +1808,131 @@ define i16 @test_add_i16() { ; CHECK-ARM8-LABEL: test_add_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB12_1 -; CHECK-ARM8-NEXT: .LBB12_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB12_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: add r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB12_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB12_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB12_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB12_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB12_2 +; CHECK-ARM8-NEXT: .LBB12_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB12_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, #1 -; CHECK-ARM8-NEXT: add r0, r0, r1 -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB12_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB12_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_add_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB12_1 -; CHECK-ARM6-NEXT: .LBB12_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI12_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB12_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: add r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI12_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB12_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB12_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB12_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB12_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB12_2 +; CHECK-ARM6-NEXT: .LBB12_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB12_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: add r0, r0, #1 -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI12_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB12_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB12_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI12_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI12_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_add_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB12_1 ; CHECK-THUMB2-NEXT: .LBB12_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: adds r1, r0, #1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB12_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: add.w r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB12_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB12_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB12_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB12_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB12_2 +; CHECK-THUMB2-NEXT: .LBB12_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB12_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB12_1 -; CHECK-THUMB2-NEXT: b .LBB12_2 -; CHECK-THUMB2-NEXT: .LBB12_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB12_5 +; CHECK-THUMB2-NEXT: .LBB12_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_add_i16: @@ -1147,70 +1953,131 @@ define i16 @test_sub_i16() { ; CHECK-ARM8-LABEL: test_sub_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB13_1 -; CHECK-ARM8-NEXT: .LBB13_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB13_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB13_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sub r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB13_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB13_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB13_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB13_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB13_2 +; CHECK-ARM8-NEXT: .LBB13_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB13_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, #1 -; CHECK-ARM8-NEXT: sub r0, r0, r1 -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB13_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB13_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_sub_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB13_1 -; CHECK-ARM6-NEXT: .LBB13_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI13_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB13_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB13_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sub r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI13_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB13_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB13_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB13_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB13_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB13_2 +; CHECK-ARM6-NEXT: .LBB13_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB13_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: sub r0, r0, #1 -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI13_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB13_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB13_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI13_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI13_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_sub_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB13_1 ; CHECK-THUMB2-NEXT: .LBB13_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: subs r1, r0, #1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB13_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sub.w r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB13_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB13_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB13_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB13_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB13_2 +; CHECK-THUMB2-NEXT: .LBB13_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB13_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB13_1 -; CHECK-THUMB2-NEXT: b .LBB13_2 -; CHECK-THUMB2-NEXT: .LBB13_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB13_5 +; CHECK-THUMB2-NEXT: .LBB13_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_sub_i16: @@ -1231,69 +2098,131 @@ define i16 @test_and_i16() { ; CHECK-ARM8-LABEL: test_and_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB14_1 -; CHECK-ARM8-NEXT: .LBB14_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB14_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB14_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: and r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB14_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB14_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB14_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB14_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB14_2 +; CHECK-ARM8-NEXT: .LBB14_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB14_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r0, r0, #1 -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB14_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB14_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_and_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB14_1 -; CHECK-ARM6-NEXT: .LBB14_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI14_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB14_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB14_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: and r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI14_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB14_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB14_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB14_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB14_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB14_2 +; CHECK-ARM6-NEXT: .LBB14_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB14_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r0, r0, #1 -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI14_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB14_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB14_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI14_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI14_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_and_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB14_1 ; CHECK-THUMB2-NEXT: .LBB14_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: and r1, r0, #1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB14_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: and r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB14_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB14_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB14_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB14_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB14_2 +; CHECK-THUMB2-NEXT: .LBB14_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB14_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB14_1 -; CHECK-THUMB2-NEXT: b .LBB14_2 -; CHECK-THUMB2-NEXT: .LBB14_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB14_5 +; CHECK-THUMB2-NEXT: .LBB14_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_and_i16: @@ -1314,74 +2243,136 @@ define i16 @test_nand_i16() { ; CHECK-ARM8-LABEL: test_nand_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB15_1 -; CHECK-ARM8-NEXT: .LBB15_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB15_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB15_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mvn r0, r1 +; CHECK-ARM8-NEXT: mvn r2, #1 +; CHECK-ARM8-NEXT: orr r12, r0, r2 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB15_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB15_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB15_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB15_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB15_2 +; CHECK-ARM8-NEXT: .LBB15_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB15_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mvn r0, r0 -; CHECK-ARM8-NEXT: mvn r1, #1 -; CHECK-ARM8-NEXT: orr r0, r0, r1 -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB15_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB15_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_nand_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB15_1 -; CHECK-ARM6-NEXT: .LBB15_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI15_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB15_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB15_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mvn r0, r1 +; CHECK-ARM6-NEXT: mvn r2, #1 +; CHECK-ARM6-NEXT: orr r12, r0, r2 +; CHECK-ARM6-NEXT: ldr r3, .LCPI15_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB15_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB15_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB15_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB15_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB15_2 +; CHECK-ARM6-NEXT: .LBB15_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB15_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mvn r0, r0 -; CHECK-ARM6-NEXT: mvn r1, #1 -; CHECK-ARM6-NEXT: orr r0, r0, r1 -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI15_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB15_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB15_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI15_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI15_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_nand_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB15_1 ; CHECK-THUMB2-NEXT: .LBB15_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r1, [r2] -; CHECK-THUMB2-NEXT: mov r0, r1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB15_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mvn r0, #1 +; CHECK-THUMB2-NEXT: orn r12, r0, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB15_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB15_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB15_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB15_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB15_2 +; CHECK-THUMB2-NEXT: .LBB15_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB15_1 Depth=1 ; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movw r0, #65534 -; CHECK-THUMB2-NEXT: orn r1, r0, r1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB15_1 -; CHECK-THUMB2-NEXT: b .LBB15_2 -; CHECK-THUMB2-NEXT: .LBB15_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB15_5 +; CHECK-THUMB2-NEXT: .LBB15_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_nand_i16: @@ -1402,70 +2393,131 @@ define i16 @test_or_i16() { ; CHECK-ARM8-LABEL: test_or_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB16_1 -; CHECK-ARM8-NEXT: .LBB16_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB16_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB16_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: orr r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB16_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB16_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB16_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB16_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB16_2 +; CHECK-ARM8-NEXT: .LBB16_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB16_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, #1 -; CHECK-ARM8-NEXT: orr r0, r0, r1 -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB16_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB16_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_or_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB16_1 -; CHECK-ARM6-NEXT: .LBB16_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI16_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB16_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB16_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: orr r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI16_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB16_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB16_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB16_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB16_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB16_2 +; CHECK-ARM6-NEXT: .LBB16_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB16_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: orr r0, r0, #1 -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI16_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB16_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB16_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI16_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI16_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_or_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB16_1 ; CHECK-THUMB2-NEXT: .LBB16_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: orr r1, r0, #1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB16_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: orr r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB16_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB16_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB16_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB16_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB16_2 +; CHECK-THUMB2-NEXT: .LBB16_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB16_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB16_1 -; CHECK-THUMB2-NEXT: b .LBB16_2 -; CHECK-THUMB2-NEXT: .LBB16_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB16_5 +; CHECK-THUMB2-NEXT: .LBB16_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_or_i16: @@ -1486,69 +2538,131 @@ define i16 @test_xor_i16() { ; CHECK-ARM8-LABEL: test_xor_i16: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB17_1 -; CHECK-ARM8-NEXT: .LBB17_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r0, [r0] +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB17_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB17_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: eor r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB17_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB17_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB17_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB17_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB17_2 +; CHECK-ARM8-NEXT: .LBB17_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB17_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: eor r0, r0, #1 -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB17_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB17_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_xor_i16: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB17_1 -; CHECK-ARM6-NEXT: .LBB17_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI17_0 -; CHECK-ARM6-NEXT: ldrexh r0, [r0] +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB17_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB17_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: eor r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI17_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB17_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB17_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB17_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB17_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB17_2 +; CHECK-ARM6-NEXT: .LBB17_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB17_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: eor r0, r0, #1 -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI17_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB17_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB17_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI17_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI17_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_xor_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB17_1 ; CHECK-THUMB2-NEXT: .LBB17_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: eor r1, r0, #1 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB17_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: eor r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB17_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB17_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB17_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB17_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB17_2 +; CHECK-THUMB2-NEXT: .LBB17_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB17_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB17_1 -; CHECK-THUMB2-NEXT: b .LBB17_2 -; CHECK-THUMB2-NEXT: .LBB17_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB17_5 +; CHECK-THUMB2-NEXT: .LBB17_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_xor_i16: @@ -1570,26 +2684,43 @@ ; CHECK-ARM8-LABEL: test_max_i16: ; CHECK-ARM8: @ %bb.0: @ %entry ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB18_1 -; CHECK-ARM8-NEXT: .LBB18_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r1, [r0] -; CHECK-ARM8-NEXT: sxth r2, r1 -; CHECK-ARM8-NEXT: mov r0, r2 +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB18_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB18_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sxth r0, r1 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: cmp r0, #1 +; CHECK-ARM8-NEXT: movgt r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB18_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB18_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB18_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB18_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB18_2 +; CHECK-ARM8-NEXT: .LBB18_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB18_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 -; CHECK-ARM8-NEXT: cmp r2, #1 -; CHECK-ARM8-NEXT: movgt r0, r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB18_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB18_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr @@ -1597,55 +2728,96 @@ ; CHECK-ARM6-LABEL: test_max_i16: ; CHECK-ARM6: @ %bb.0: @ %entry ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB18_1 -; CHECK-ARM6-NEXT: .LBB18_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI18_0 -; CHECK-ARM6-NEXT: ldrexh r1, [r0] -; CHECK-ARM6-NEXT: sxth r2, r1 -; CHECK-ARM6-NEXT: mov r0, r2 +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB18_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB18_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sxth r0, r1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: cmp r0, #1 +; CHECK-ARM6-NEXT: movgt r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI18_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB18_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB18_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB18_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB18_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB18_2 +; CHECK-ARM6-NEXT: .LBB18_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB18_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 -; CHECK-ARM6-NEXT: cmp r2, #1 -; CHECK-ARM6-NEXT: movgt r0, r1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI18_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB18_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB18_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI18_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI18_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_max_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB18_1 ; CHECK-THUMB2-NEXT: .LBB18_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: sxth r3, r0 -; CHECK-THUMB2-NEXT: mov r1, r3 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r3, #1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB18_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sxth r0, r1 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r0, #1 ; CHECK-THUMB2-NEXT: it gt -; CHECK-THUMB2-NEXT: movgt r1, r0 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movgt r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB18_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB18_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB18_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB18_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB18_2 +; CHECK-THUMB2-NEXT: .LBB18_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB18_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB18_1 -; CHECK-THUMB2-NEXT: b .LBB18_2 -; CHECK-THUMB2-NEXT: .LBB18_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB18_5 +; CHECK-THUMB2-NEXT: .LBB18_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_max_i16: @@ -1667,26 +2839,43 @@ ; CHECK-ARM8-LABEL: test_min_i16: ; CHECK-ARM8: @ %bb.0: @ %entry ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB19_1 -; CHECK-ARM8-NEXT: .LBB19_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r1, [r0] -; CHECK-ARM8-NEXT: sxth r2, r1 -; CHECK-ARM8-NEXT: mov r0, r2 +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB19_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB19_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sxth r0, r1 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: cmp r0, #2 +; CHECK-ARM8-NEXT: movlt r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: .LBB19_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB19_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB19_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB19_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB19_2 +; CHECK-ARM8-NEXT: .LBB19_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB19_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 -; CHECK-ARM8-NEXT: cmp r2, #2 -; CHECK-ARM8-NEXT: movlt r0, r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB19_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: uxth r1, r1 +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB19_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr @@ -1694,55 +2883,96 @@ ; CHECK-ARM6-LABEL: test_min_i16: ; CHECK-ARM6: @ %bb.0: @ %entry ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB19_1 -; CHECK-ARM6-NEXT: .LBB19_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI19_0 -; CHECK-ARM6-NEXT: ldrexh r1, [r0] -; CHECK-ARM6-NEXT: sxth r2, r1 -; CHECK-ARM6-NEXT: mov r0, r2 +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB19_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB19_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sxth r0, r1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: cmp r0, #2 +; CHECK-ARM6-NEXT: movlt r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI19_1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: .LBB19_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB19_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB19_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB19_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB19_2 +; CHECK-ARM6-NEXT: .LBB19_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB19_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 -; CHECK-ARM6-NEXT: cmp r2, #2 -; CHECK-ARM6-NEXT: movlt r0, r1 +; CHECK-ARM6-NEXT: uxth r1, r1 +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI19_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB19_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB19_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI19_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI19_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_min_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB19_1 ; CHECK-THUMB2-NEXT: .LBB19_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: sxth r3, r0 -; CHECK-THUMB2-NEXT: mov r1, r3 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r3, #2 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB19_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sxth r0, r1 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r0, #2 ; CHECK-THUMB2-NEXT: it lt -; CHECK-THUMB2-NEXT: movlt r1, r0 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movlt r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: .LBB19_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB19_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB19_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB19_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB19_2 +; CHECK-THUMB2-NEXT: .LBB19_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB19_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: uxth r1, r1 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB19_1 -; CHECK-THUMB2-NEXT: b .LBB19_2 -; CHECK-THUMB2-NEXT: .LBB19_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB19_5 +; CHECK-THUMB2-NEXT: .LBB19_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_min_i16: @@ -1763,79 +2993,144 @@ define i16 @test_umax_i16() { ; CHECK-ARM8-LABEL: test_umax_i16: ; CHECK-ARM8: @ %bb.0: @ %entry +; CHECK-ARM8-NEXT: push {r11, lr} +; CHECK-ARM8-NEXT: mov r11, sp ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB20_1 -; CHECK-ARM8-NEXT: .LBB20_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB20_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB20_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: uxth r1, r12 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 -; CHECK-ARM8-NEXT: movhi r0, r1 +; CHECK-ARM8-NEXT: movhi lr, r12 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r12, r12 +; CHECK-ARM8-NEXT: .LBB20_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB20_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r12 +; CHECK-ARM8-NEXT: bne .LBB20_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB20_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, lr, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB20_2 +; CHECK-ARM8-NEXT: .LBB20_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB20_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB20_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: beq .LBB20_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: mov sp, r11 +; CHECK-ARM8-NEXT: pop {r11, pc} ; ; CHECK-ARM6-LABEL: test_umax_i16: ; CHECK-ARM6: @ %bb.0: @ %entry +; CHECK-ARM6-NEXT: push {r11, lr} +; CHECK-ARM6-NEXT: mov r11, sp ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB20_1 -; CHECK-ARM6-NEXT: .LBB20_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI20_0 -; CHECK-ARM6-NEXT: ldrexh r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB20_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB20_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: uxth r1, r12 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 -; CHECK-ARM6-NEXT: movhi r0, r1 +; CHECK-ARM6-NEXT: movhi lr, r12 +; CHECK-ARM6-NEXT: ldr r3, .LCPI20_1 +; CHECK-ARM6-NEXT: uxth r12, r12 +; CHECK-ARM6-NEXT: .LBB20_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB20_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r12 +; CHECK-ARM6-NEXT: bne .LBB20_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB20_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, lr, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB20_2 +; CHECK-ARM6-NEXT: .LBB20_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB20_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI20_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB20_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB20_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: mov sp, r11 +; CHECK-ARM6-NEXT: pop {r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI20_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI20_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_umax_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: push {r7, lr} +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB20_1 ; CHECK-THUMB2-NEXT: .LBB20_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB20_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr.w r12, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: uxth.w r1, r12 +; CHECK-THUMB2-NEXT: mov.w lr, #1 +; CHECK-THUMB2-NEXT: cmp r1, #1 ; CHECK-THUMB2-NEXT: it hi -; CHECK-THUMB2-NEXT: movhi r1, r0 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movhi lr, r12 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth.w r12, r12 +; CHECK-THUMB2-NEXT: .LBB20_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB20_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r12 +; CHECK-THUMB2-NEXT: bne .LBB20_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB20_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, lr, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB20_2 +; CHECK-THUMB2-NEXT: .LBB20_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB20_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB20_1 -; CHECK-THUMB2-NEXT: b .LBB20_2 -; CHECK-THUMB2-NEXT: .LBB20_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB20_5 +; CHECK-THUMB2-NEXT: .LBB20_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #8 +; CHECK-THUMB2-NEXT: pop {r7, pc} ; ; CHECK-THUMB1-LABEL: test_umax_i16: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -1855,79 +3150,144 @@ define i16 @test_umin_i16() { ; CHECK-ARM8-LABEL: test_umin_i16: ; CHECK-ARM8: @ %bb.0: @ %entry +; CHECK-ARM8-NEXT: push {r11, lr} +; CHECK-ARM8-NEXT: mov r11, sp ; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB21_1 -; CHECK-ARM8-NEXT: .LBB21_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: ldrexh r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r0, #1 +; CHECK-ARM8-NEXT: ldrh r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB21_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB21_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: uxth r1, r12 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #2 -; CHECK-ARM8-NEXT: movlo r0, r1 +; CHECK-ARM8-NEXT: movlo lr, r12 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-ARM8-NEXT: uxth r12, r12 +; CHECK-ARM8-NEXT: .LBB21_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB21_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexh r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r12 +; CHECK-ARM8-NEXT: bne .LBB21_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB21_2 Depth=2 +; CHECK-ARM8-NEXT: strexh r2, lr, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB21_2 +; CHECK-ARM8-NEXT: .LBB21_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB21_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: uxth r1, r0 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-ARM8-NEXT: strexh r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB21_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: beq .LBB21_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: mov sp, r11 +; CHECK-ARM8-NEXT: pop {r11, pc} ; ; CHECK-ARM6-LABEL: test_umin_i16: ; CHECK-ARM6: @ %bb.0: @ %entry +; CHECK-ARM6-NEXT: push {r11, lr} +; CHECK-ARM6-NEXT: mov r11, sp ; CHECK-ARM6-NEXT: sub sp, sp, #8 -; CHECK-ARM6-NEXT: b .LBB21_1 -; CHECK-ARM6-NEXT: .LBB21_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-ARM6-NEXT: ldr r0, .LCPI21_0 -; CHECK-ARM6-NEXT: ldrexh r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r0, #1 +; CHECK-ARM6-NEXT: ldrh r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB21_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB21_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: uxth r1, r12 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #2 -; CHECK-ARM6-NEXT: movlo r0, r1 +; CHECK-ARM6-NEXT: movlo lr, r12 +; CHECK-ARM6-NEXT: ldr r3, .LCPI21_1 +; CHECK-ARM6-NEXT: uxth r12, r12 +; CHECK-ARM6-NEXT: .LBB21_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB21_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexh r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r12 +; CHECK-ARM6-NEXT: bne .LBB21_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB21_2 Depth=2 +; CHECK-ARM6-NEXT: strexh r2, lr, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB21_2 +; CHECK-ARM6-NEXT: .LBB21_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB21_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: uxth r1, r0 -; CHECK-ARM6-NEXT: ldr r2, .LCPI21_0 -; CHECK-ARM6-NEXT: strexh r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB21_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB21_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: mov sp, r11 +; CHECK-ARM6-NEXT: pop {r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI21_0: ; CHECK-ARM6-NEXT: .long atomic_i16 +; CHECK-ARM6-NEXT: .LCPI21_1: +; CHECK-ARM6-NEXT: .long atomic_i16 ; ; CHECK-THUMB2-LABEL: test_umin_i16: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: push {r7, lr} +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: ldrh r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB21_1 ; CHECK-THUMB2-NEXT: .LBB21_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i16 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i16 -; CHECK-THUMB2-NEXT: ldrexh r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #2 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB21_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr.w r12, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: uxth.w r1, r12 +; CHECK-THUMB2-NEXT: mov.w lr, #1 +; CHECK-THUMB2-NEXT: cmp r1, #2 ; CHECK-THUMB2-NEXT: it lo -; CHECK-THUMB2-NEXT: movlo r1, r0 -; CHECK-THUMB2-NEXT: strexh r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movlo lr, r12 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i16 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i16 +; CHECK-THUMB2-NEXT: uxth.w r12, r12 +; CHECK-THUMB2-NEXT: .LBB21_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB21_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexh r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r12 +; CHECK-THUMB2-NEXT: bne .LBB21_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB21_2 Depth=2 +; CHECK-THUMB2-NEXT: strexh r2, lr, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB21_2 +; CHECK-THUMB2-NEXT: .LBB21_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB21_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB21_1 -; CHECK-THUMB2-NEXT: b .LBB21_2 -; CHECK-THUMB2-NEXT: .LBB21_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB21_5 +; CHECK-THUMB2-NEXT: .LBB21_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #8 +; CHECK-THUMB2-NEXT: pop {r7, pc} ; ; CHECK-THUMB1-LABEL: test_umin_i16: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -1949,66 +3309,125 @@ define i32 @test_xchg_i32() { ; CHECK-ARM8-LABEL: test_xchg_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB22_1 -; CHECK-ARM8-NEXT: .LBB22_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB22_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB22_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: .LBB22_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB22_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB22_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB22_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB22_2 +; CHECK-ARM8-NEXT: .LBB22_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB22_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: mov r1, #1 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB22_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB22_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_xchg_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB22_1 -; CHECK-ARM6-NEXT: .LBB22_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI22_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB22_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB22_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r3, .LCPI22_1 +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: .LBB22_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB22_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB22_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB22_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB22_2 +; CHECK-ARM6-NEXT: .LBB22_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB22_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI22_0 -; CHECK-ARM6-NEXT: mov r1, #1 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB22_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB22_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI22_0: ; CHECK-ARM6-NEXT: .long atomic_i32 +; CHECK-ARM6-NEXT: .LCPI22_1: +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_xchg_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB22_1 ; CHECK-THUMB2-NEXT: .LBB22_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB22_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: .LBB22_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB22_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB22_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB22_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB22_2 +; CHECK-THUMB2-NEXT: .LBB22_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB22_1 Depth=1 ; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB22_1 -; CHECK-THUMB2-NEXT: b .LBB22_2 -; CHECK-THUMB2-NEXT: .LBB22_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB22_5 +; CHECK-THUMB2-NEXT: .LBB22_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_xchg_i32: @@ -2029,67 +3448,125 @@ define i32 @test_add_i32() { ; CHECK-ARM8-LABEL: test_add_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB23_1 -; CHECK-ARM8-NEXT: .LBB23_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB23_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB23_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: add r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB23_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB23_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB23_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB23_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB23_2 +; CHECK-ARM8-NEXT: .LBB23_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB23_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: add r1, r0, #1 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB23_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB23_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_add_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB23_1 -; CHECK-ARM6-NEXT: .LBB23_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI23_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB23_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB23_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: add r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI23_1 +; CHECK-ARM6-NEXT: .LBB23_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB23_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB23_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB23_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB23_2 +; CHECK-ARM6-NEXT: .LBB23_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB23_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: add r1, r0, #1 -; CHECK-ARM6-NEXT: ldr r2, .LCPI23_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB23_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB23_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI23_0: ; CHECK-ARM6-NEXT: .long atomic_i32 +; CHECK-ARM6-NEXT: .LCPI23_1: +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_add_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB23_1 ; CHECK-THUMB2-NEXT: .LBB23_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: adds r1, r0, #1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB23_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: add.w r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB23_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB23_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB23_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB23_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB23_2 +; CHECK-THUMB2-NEXT: .LBB23_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB23_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB23_1 -; CHECK-THUMB2-NEXT: b .LBB23_2 -; CHECK-THUMB2-NEXT: .LBB23_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB23_5 +; CHECK-THUMB2-NEXT: .LBB23_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_add_i32: @@ -2110,67 +3587,125 @@ define i32 @test_sub_i32() { ; CHECK-ARM8-LABEL: test_sub_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB24_1 -; CHECK-ARM8-NEXT: .LBB24_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB24_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB24_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sub r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB24_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB24_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB24_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB24_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB24_2 +; CHECK-ARM8-NEXT: .LBB24_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB24_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: sub r1, r0, #1 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB24_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB24_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_sub_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB24_1 -; CHECK-ARM6-NEXT: .LBB24_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI24_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB24_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB24_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sub r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI24_1 +; CHECK-ARM6-NEXT: .LBB24_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB24_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB24_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB24_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB24_2 +; CHECK-ARM6-NEXT: .LBB24_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB24_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: sub r1, r0, #1 -; CHECK-ARM6-NEXT: ldr r2, .LCPI24_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB24_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB24_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI24_0: ; CHECK-ARM6-NEXT: .long atomic_i32 +; CHECK-ARM6-NEXT: .LCPI24_1: +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_sub_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB24_1 ; CHECK-THUMB2-NEXT: .LBB24_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: subs r1, r0, #1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB24_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: sub.w r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB24_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB24_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB24_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB24_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB24_2 +; CHECK-THUMB2-NEXT: .LBB24_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB24_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB24_1 -; CHECK-THUMB2-NEXT: b .LBB24_2 -; CHECK-THUMB2-NEXT: .LBB24_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB24_5 +; CHECK-THUMB2-NEXT: .LBB24_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_sub_i32: @@ -2191,67 +3726,125 @@ define i32 @test_and_i32() { ; CHECK-ARM8-LABEL: test_and_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB25_1 -; CHECK-ARM8-NEXT: .LBB25_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB25_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB25_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: and r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB25_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB25_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB25_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB25_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB25_2 +; CHECK-ARM8-NEXT: .LBB25_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB25_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r1, r0, #1 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB25_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB25_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_and_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB25_1 -; CHECK-ARM6-NEXT: .LBB25_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI25_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB25_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB25_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: and r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI25_1 +; CHECK-ARM6-NEXT: .LBB25_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB25_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB25_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB25_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB25_2 +; CHECK-ARM6-NEXT: .LBB25_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB25_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r1, r0, #1 -; CHECK-ARM6-NEXT: ldr r2, .LCPI25_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB25_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB25_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI25_0: ; CHECK-ARM6-NEXT: .long atomic_i32 +; CHECK-ARM6-NEXT: .LCPI25_1: +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_and_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB25_1 ; CHECK-THUMB2-NEXT: .LBB25_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: and r1, r0, #1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB25_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: and r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB25_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB25_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB25_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB25_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB25_2 +; CHECK-THUMB2-NEXT: .LBB25_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB25_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB25_1 -; CHECK-THUMB2-NEXT: b .LBB25_2 -; CHECK-THUMB2-NEXT: .LBB25_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB25_5 +; CHECK-THUMB2-NEXT: .LBB25_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_and_i32: @@ -2272,74 +3865,130 @@ define i32 @test_nand_i32() { ; CHECK-ARM8-LABEL: test_nand_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB26_1 -; CHECK-ARM8-NEXT: .LBB26_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB26_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB26_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mvn r0, r1 +; CHECK-ARM8-NEXT: mvn r2, #1 +; CHECK-ARM8-NEXT: orr r12, r0, r2 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB26_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB26_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB26_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB26_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB26_2 +; CHECK-ARM8-NEXT: .LBB26_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB26_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r0, r0, #1 -; CHECK-ARM8-NEXT: mvn r1, #0 -; CHECK-ARM8-NEXT: eor r1, r0, r1 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB26_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB26_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_nand_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB26_1 -; CHECK-ARM6-NEXT: .LBB26_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI26_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB26_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB26_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mvn r0, r1 +; CHECK-ARM6-NEXT: mvn r2, #1 +; CHECK-ARM6-NEXT: orr r12, r0, r2 +; CHECK-ARM6-NEXT: ldr r3, .LCPI26_1 +; CHECK-ARM6-NEXT: .LBB26_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB26_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB26_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB26_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB26_2 +; CHECK-ARM6-NEXT: .LBB26_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB26_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r0, r0, #1 -; CHECK-ARM6-NEXT: ldr r1, .LCPI26_1 -; CHECK-ARM6-NEXT: eor r1, r0, r1 -; CHECK-ARM6-NEXT: ldr r2, .LCPI26_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB26_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB26_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI26_0: ; CHECK-ARM6-NEXT: .long atomic_i32 ; CHECK-ARM6-NEXT: .LCPI26_1: -; CHECK-ARM6-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_nand_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB26_1 ; CHECK-THUMB2-NEXT: .LBB26_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r1, [r2] -; CHECK-THUMB2-NEXT: mov r0, r1 -; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB26_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-THUMB2-NEXT: mvn r0, #1 -; CHECK-THUMB2-NEXT: orn r1, r0, r1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: orn r12, r0, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB26_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB26_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB26_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB26_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB26_2 +; CHECK-THUMB2-NEXT: .LBB26_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB26_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB26_1 -; CHECK-THUMB2-NEXT: b .LBB26_2 -; CHECK-THUMB2-NEXT: .LBB26_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB26_5 +; CHECK-THUMB2-NEXT: .LBB26_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_nand_i32: @@ -2360,67 +4009,125 @@ define i32 @test_or_i32() { ; CHECK-ARM8-LABEL: test_or_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB27_1 -; CHECK-ARM8-NEXT: .LBB27_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB27_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB27_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: orr r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB27_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB27_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB27_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB27_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB27_2 +; CHECK-ARM8-NEXT: .LBB27_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB27_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: orr r1, r0, #1 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB27_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB27_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_or_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB27_1 -; CHECK-ARM6-NEXT: .LBB27_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI27_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB27_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB27_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: orr r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI27_1 +; CHECK-ARM6-NEXT: .LBB27_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB27_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB27_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB27_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB27_2 +; CHECK-ARM6-NEXT: .LBB27_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB27_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: orr r1, r0, #1 -; CHECK-ARM6-NEXT: ldr r2, .LCPI27_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB27_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB27_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI27_0: ; CHECK-ARM6-NEXT: .long atomic_i32 +; CHECK-ARM6-NEXT: .LCPI27_1: +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_or_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB27_1 ; CHECK-THUMB2-NEXT: .LBB27_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: orr r1, r0, #1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB27_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: orr r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB27_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB27_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB27_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB27_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB27_2 +; CHECK-THUMB2-NEXT: .LBB27_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB27_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB27_1 -; CHECK-THUMB2-NEXT: b .LBB27_2 -; CHECK-THUMB2-NEXT: .LBB27_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB27_5 +; CHECK-THUMB2-NEXT: .LBB27_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_or_i32: @@ -2441,67 +4148,125 @@ define i32 @test_xor_i32() { ; CHECK-ARM8-LABEL: test_xor_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #4 -; CHECK-ARM8-NEXT: b .LBB28_1 -; CHECK-ARM8-NEXT: .LBB28_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r0, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB28_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB28_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: eor r12, r1, #1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB28_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB28_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB28_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB28_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB28_2 +; CHECK-ARM8-NEXT: .LBB28_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB28_1 Depth=1 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: eor r1, r0, #1 -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB28_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB28_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #4 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_xor_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #4 -; CHECK-ARM6-NEXT: b .LBB28_1 -; CHECK-ARM6-NEXT: .LBB28_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI28_0 -; CHECK-ARM6-NEXT: ldrex r0, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB28_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB28_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: eor r12, r1, #1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI28_1 +; CHECK-ARM6-NEXT: .LBB28_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB28_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB28_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB28_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB28_2 +; CHECK-ARM6-NEXT: .LBB28_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB28_1 Depth=1 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: eor r1, r0, #1 -; CHECK-ARM6-NEXT: ldr r2, .LCPI28_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB28_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB28_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #4 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI28_0: ; CHECK-ARM6-NEXT: .long atomic_i32 +; CHECK-ARM6-NEXT: .LCPI28_1: +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_xor_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB28_1 ; CHECK-THUMB2-NEXT: .LBB28_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: eor r1, r0, #1 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB28_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: eor r12, r1, #1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB28_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB28_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB28_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB28_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB28_2 +; CHECK-THUMB2-NEXT: .LBB28_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB28_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB28_1 -; CHECK-THUMB2-NEXT: b .LBB28_2 -; CHECK-THUMB2-NEXT: .LBB28_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB28_5 +; CHECK-THUMB2-NEXT: .LBB28_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_xor_i32: @@ -2522,84 +4287,132 @@ define i32 @test_max_i32() { ; CHECK-ARM8-LABEL: test_max_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #12 -; CHECK-ARM8-NEXT: b .LBB29_1 -; CHECK-ARM8-NEXT: .LBB29_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB29_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB29_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 -; CHECK-ARM8-NEXT: movw r0, #0 -; CHECK-ARM8-NEXT: movgt r0, #1 +; CHECK-ARM8-NEXT: movgt r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB29_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB29_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB29_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB29_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB29_2 +; CHECK-ARM8-NEXT: .LBB29_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB29_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: tst r0, #1 -; CHECK-ARM8-NEXT: moveq r1, #1 -; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB29_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: beq .LBB29_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #12 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_max_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #12 -; CHECK-ARM6-NEXT: b .LBB29_1 -; CHECK-ARM6-NEXT: .LBB29_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI29_0 -; CHECK-ARM6-NEXT: ldrex r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB29_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB29_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI29_1 -; CHECK-ARM6-NEXT: movgt r0, #1 +; CHECK-ARM6-NEXT: movgt r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI29_1 +; CHECK-ARM6-NEXT: .LBB29_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB29_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB29_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB29_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB29_2 +; CHECK-ARM6-NEXT: .LBB29_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB29_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: tst r0, #1 -; CHECK-ARM6-NEXT: moveq r1, #1 -; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI29_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB29_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB29_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #12 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI29_0: ; CHECK-ARM6-NEXT: .long atomic_i32 ; CHECK-ARM6-NEXT: .LCPI29_1: -; CHECK-ARM6-NEXT: .long 0 @ 0x0 +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_max_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB29_1 ; CHECK-THUMB2-NEXT: .LBB29_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB29_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r1, #1 ; CHECK-THUMB2-NEXT: it gt -; CHECK-THUMB2-NEXT: movgt r1, r0 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movgt r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB29_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB29_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB29_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB29_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB29_2 +; CHECK-THUMB2-NEXT: .LBB29_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB29_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB29_1 -; CHECK-THUMB2-NEXT: b .LBB29_2 -; CHECK-THUMB2-NEXT: .LBB29_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB29_5 +; CHECK-THUMB2-NEXT: .LBB29_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_max_i32: @@ -2620,84 +4433,132 @@ define i32 @test_min_i32() { ; CHECK-ARM8-LABEL: test_min_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #12 -; CHECK-ARM8-NEXT: b .LBB30_1 -; CHECK-ARM8-NEXT: .LBB30_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: cmp r1, #1 -; CHECK-ARM8-NEXT: movw r0, #0 -; CHECK-ARM8-NEXT: movle r0, #1 +; CHECK-ARM8-NEXT: ldr r0, [r0] ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: tst r0, #1 -; CHECK-ARM8-NEXT: moveq r1, #1 -; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB30_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: .LBB30_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB30_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: cmp r1, #2 +; CHECK-ARM8-NEXT: movlt r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB30_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB30_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB30_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB30_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB30_2 +; CHECK-ARM8-NEXT: .LBB30_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB30_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB30_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #12 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_min_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #12 -; CHECK-ARM6-NEXT: b .LBB30_1 -; CHECK-ARM6-NEXT: .LBB30_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI30_0 -; CHECK-ARM6-NEXT: ldrex r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: cmp r1, #1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI30_1 -; CHECK-ARM6-NEXT: movle r0, #1 +; CHECK-ARM6-NEXT: ldr r0, [r0] ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: tst r0, #1 -; CHECK-ARM6-NEXT: moveq r1, #1 -; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI30_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB30_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: .LBB30_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB30_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: cmp r1, #2 +; CHECK-ARM6-NEXT: movlt r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI30_1 +; CHECK-ARM6-NEXT: .LBB30_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB30_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB30_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB30_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB30_2 +; CHECK-ARM6-NEXT: .LBB30_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB30_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB30_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #12 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI30_0: ; CHECK-ARM6-NEXT: .long atomic_i32 ; CHECK-ARM6-NEXT: .LCPI30_1: -; CHECK-ARM6-NEXT: .long 0 @ 0x0 +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_min_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB30_1 ; CHECK-THUMB2-NEXT: .LBB30_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #2 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB30_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r1, #2 ; CHECK-THUMB2-NEXT: it lt -; CHECK-THUMB2-NEXT: movlt r1, r0 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movlt r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB30_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB30_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB30_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB30_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB30_2 +; CHECK-THUMB2-NEXT: .LBB30_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB30_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB30_1 -; CHECK-THUMB2-NEXT: b .LBB30_2 -; CHECK-THUMB2-NEXT: .LBB30_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB30_5 +; CHECK-THUMB2-NEXT: .LBB30_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_min_i32: @@ -2718,84 +4579,132 @@ define i32 @test_umax_i32() { ; CHECK-ARM8-LABEL: test_umax_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #12 -; CHECK-ARM8-NEXT: b .LBB31_1 -; CHECK-ARM8-NEXT: .LBB31_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: ldr r0, [r0] +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: .LBB31_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB31_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 -; CHECK-ARM8-NEXT: movw r0, #0 -; CHECK-ARM8-NEXT: movhi r0, #1 +; CHECK-ARM8-NEXT: movhi r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB31_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB31_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB31_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB31_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB31_2 +; CHECK-ARM8-NEXT: .LBB31_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB31_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: tst r0, #1 -; CHECK-ARM8-NEXT: moveq r1, #1 -; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB31_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: beq .LBB31_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #12 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_umax_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #12 -; CHECK-ARM6-NEXT: b .LBB31_1 -; CHECK-ARM6-NEXT: .LBB31_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI31_0 -; CHECK-ARM6-NEXT: ldrex r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: ldr r0, [r0] +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: .LBB31_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB31_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI31_1 -; CHECK-ARM6-NEXT: movhi r0, #1 +; CHECK-ARM6-NEXT: movhi r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI31_1 +; CHECK-ARM6-NEXT: .LBB31_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB31_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB31_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB31_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB31_2 +; CHECK-ARM6-NEXT: .LBB31_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB31_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: tst r0, #1 -; CHECK-ARM6-NEXT: moveq r1, #1 -; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI31_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB31_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: beq .LBB31_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #12 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI31_0: ; CHECK-ARM6-NEXT: .long atomic_i32 ; CHECK-ARM6-NEXT: .LCPI31_1: -; CHECK-ARM6-NEXT: .long 0 @ 0x0 +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_umax_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB31_1 ; CHECK-THUMB2-NEXT: .LBB31_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB31_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r1, #1 ; CHECK-THUMB2-NEXT: it hi -; CHECK-THUMB2-NEXT: movhi r1, r0 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movhi r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB31_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB31_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB31_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB31_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB31_2 +; CHECK-THUMB2-NEXT: .LBB31_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB31_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB31_1 -; CHECK-THUMB2-NEXT: b .LBB31_2 -; CHECK-THUMB2-NEXT: .LBB31_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB31_5 +; CHECK-THUMB2-NEXT: .LBB31_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_umax_i32: @@ -2816,84 +4725,132 @@ define i32 @test_umin_i32() { ; CHECK-ARM8-LABEL: test_umin_i32: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #12 -; CHECK-ARM8-NEXT: b .LBB32_1 -; CHECK-ARM8-NEXT: .LBB32_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: sub sp, sp, #8 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: ldrex r1, [r0] -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: cmp r1, #1 -; CHECK-ARM8-NEXT: movw r0, #0 -; CHECK-ARM8-NEXT: movls r0, #1 +; CHECK-ARM8-NEXT: ldr r0, [r0] ; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: tst r0, #1 -; CHECK-ARM8-NEXT: moveq r1, #1 -; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-ARM8-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-ARM8-NEXT: strex r0, r1, [r2] -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: bne .LBB32_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: .LBB32_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB32_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r12, #1 +; CHECK-ARM8-NEXT: cmp r1, #2 +; CHECK-ARM8-NEXT: movlo r12, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-ARM8-NEXT: .LBB32_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB32_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrex r0, [r3] +; CHECK-ARM8-NEXT: cmp r0, r1 +; CHECK-ARM8-NEXT: bne .LBB32_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB32_2 Depth=2 +; CHECK-ARM8-NEXT: strex r2, r12, [r3] +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: bne .LBB32_2 +; CHECK-ARM8-NEXT: .LBB32_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB32_1 Depth=1 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: sub r1, r0, r1 +; CHECK-ARM8-NEXT: clz r1, r1 +; CHECK-ARM8-NEXT: lsr r1, r1, #5 +; CHECK-ARM8-NEXT: tst r1, #1 +; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: beq .LBB32_1 +; CHECK-ARM8-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #12 +; CHECK-ARM8-NEXT: add sp, sp, #8 ; CHECK-ARM8-NEXT: bx lr ; ; CHECK-ARM6-LABEL: test_umin_i32: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #12 -; CHECK-ARM6-NEXT: b .LBB32_1 -; CHECK-ARM6-NEXT: .LBB32_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM6-NEXT: sub sp, sp, #8 ; CHECK-ARM6-NEXT: ldr r0, .LCPI32_0 -; CHECK-ARM6-NEXT: ldrex r1, [r0] -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: cmp r1, #1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI32_1 -; CHECK-ARM6-NEXT: movls r0, #1 +; CHECK-ARM6-NEXT: ldr r0, [r0] ; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: tst r0, #1 -; CHECK-ARM6-NEXT: moveq r1, #1 -; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r2, .LCPI32_0 -; CHECK-ARM6-NEXT: strex r0, r1, [r2] -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: bne .LBB32_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: .LBB32_1: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB32_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r12, #1 +; CHECK-ARM6-NEXT: cmp r1, #2 +; CHECK-ARM6-NEXT: movlo r12, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI32_1 +; CHECK-ARM6-NEXT: .LBB32_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB32_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrex r0, [r3] +; CHECK-ARM6-NEXT: cmp r0, r1 +; CHECK-ARM6-NEXT: bne .LBB32_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB32_2 Depth=2 +; CHECK-ARM6-NEXT: strex r2, r12, [r3] +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: bne .LBB32_2 +; CHECK-ARM6-NEXT: .LBB32_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB32_1 Depth=1 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: sub r1, r0, r1 +; CHECK-ARM6-NEXT: clz r1, r1 +; CHECK-ARM6-NEXT: lsr r1, r1, #5 +; CHECK-ARM6-NEXT: tst r1, #1 +; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: beq .LBB32_1 +; CHECK-ARM6-NEXT: @ %bb.5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #12 +; CHECK-ARM6-NEXT: add sp, sp, #8 ; CHECK-ARM6-NEXT: bx lr ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI32_0: ; CHECK-ARM6-NEXT: .long atomic_i32 ; CHECK-ARM6-NEXT: .LCPI32_1: -; CHECK-ARM6-NEXT: .long 0 @ 0x0 +; CHECK-ARM6-NEXT: .long atomic_i32 ; ; CHECK-THUMB2-LABEL: test_umin_i32: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #4 +; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: ldr r0, [r0] +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB32_1 ; CHECK-THUMB2-NEXT: .LBB32_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB2-NEXT: movw r2, :lower16:atomic_i32 -; CHECK-THUMB2-NEXT: movt r2, :upper16:atomic_i32 -; CHECK-THUMB2-NEXT: ldrex r0, [r2] -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: cmp r0, #2 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB32_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov.w r12, #1 +; CHECK-THUMB2-NEXT: cmp r1, #2 ; CHECK-THUMB2-NEXT: it lo -; CHECK-THUMB2-NEXT: movlo r1, r0 -; CHECK-THUMB2-NEXT: strex r0, r1, [r2] -; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: movlo r12, r1 +; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i32 +; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i32 +; CHECK-THUMB2-NEXT: .LBB32_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB32_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrex r0, [r3] +; CHECK-THUMB2-NEXT: cmp r0, r1 +; CHECK-THUMB2-NEXT: bne .LBB32_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB32_2 Depth=2 +; CHECK-THUMB2-NEXT: strex r2, r12, [r3] +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: bne .LBB32_2 +; CHECK-THUMB2-NEXT: .LBB32_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB32_1 Depth=1 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: subs r1, r0, r1 +; CHECK-THUMB2-NEXT: clz r1, r1 +; CHECK-THUMB2-NEXT: lsrs r1, r1, #5 +; CHECK-THUMB2-NEXT: cmp r1, #1 +; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB32_1 -; CHECK-THUMB2-NEXT: b .LBB32_2 -; CHECK-THUMB2-NEXT: .LBB32_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB32_5 +; CHECK-THUMB2-NEXT: .LBB32_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #4 +; CHECK-THUMB2-NEXT: add sp, #8 ; CHECK-THUMB2-NEXT: bx lr ; ; CHECK-THUMB1-LABEL: test_umin_i32: @@ -2917,84 +4874,176 @@ define i64 @test_xchg_i64() { ; CHECK-ARM8-LABEL: test_xchg_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB33_1 -; CHECK-ARM8-NEXT: .LBB33_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r0, r1, [r0] -; CHECK-ARM8-NEXT: mov r2, r1 -; CHECK-ARM8-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB33_1 +; CHECK-ARM8-NEXT: .LBB33_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB33_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 ; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: mov r2, #1 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM8-NEXT: mov r8, #1 +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r0 +; CHECK-ARM8-NEXT: .LBB33_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB33_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB33_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB33_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB33_2 +; CHECK-ARM8-NEXT: .LBB33_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB33_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 +; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB33_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: b .LBB33_5 +; CHECK-ARM8-NEXT: .LBB33_5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_xchg_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI33_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB33_1 ; CHECK-ARM6-NEXT: .LBB33_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI33_0 -; CHECK-ARM6-NEXT: ldrexd r0, r1, [r0] -; CHECK-ARM6-NEXT: mov r2, r1 -; CHECK-ARM6-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 -; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: ldr r1, .LCPI33_0 +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB33_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI33_0 ; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: mov r2, #1 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM6-NEXT: mov r8, #1 +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r0 +; CHECK-ARM6-NEXT: .LBB33_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB33_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB33_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB33_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB33_2 +; CHECK-ARM6-NEXT: .LBB33_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB33_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 +; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB33_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: b .LBB33_5 +; CHECK-ARM6-NEXT: .LBB33_5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI33_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_xchg_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB33_1 ; CHECK-THUMB2-NEXT: .LBB33_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB33_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r0, r1, [r3] -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: movs r2, #0 -; CHECK-THUMB2-NEXT: movs r1, #1 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: movs r0, #0 +; CHECK-THUMB2-NEXT: mov.w r8, #1 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r0 +; CHECK-THUMB2-NEXT: .LBB33_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB33_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB33_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB33_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB33_2 +; CHECK-THUMB2-NEXT: .LBB33_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB33_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB33_1 -; CHECK-THUMB2-NEXT: b .LBB33_2 -; CHECK-THUMB2-NEXT: .LBB33_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB33_5 +; CHECK-THUMB2-NEXT: .LBB33_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_xchg_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -3015,86 +5064,176 @@ define i64 @test_add_i64() { ; CHECK-ARM8-LABEL: test_add_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB34_1 -; CHECK-ARM8-NEXT: .LBB34_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM8-NEXT: mov r0, r3 +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB34_1 +; CHECK-ARM8-NEXT: .LBB34_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB34_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: adds r8, r2, #1 +; CHECK-ARM8-NEXT: adc r0, r1, #0 +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r0 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: .LBB34_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB34_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB34_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB34_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] +; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB34_2 +; CHECK-ARM8-NEXT: .LBB34_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB34_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r1, r2 +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 ; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: adds r2, r1, #1 -; CHECK-ARM8-NEXT: adc r0, r0, #0 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB34_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: b .LBB34_5 +; CHECK-ARM8-NEXT: .LBB34_5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_add_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI34_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB34_1 ; CHECK-ARM6-NEXT: .LBB34_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI34_0 -; CHECK-ARM6-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM6-NEXT: mov r0, r3 +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB34_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: adds r8, r2, #1 +; CHECK-ARM6-NEXT: adc r0, r1, #0 +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r0 +; CHECK-ARM6-NEXT: ldr r3, .LCPI34_0 +; CHECK-ARM6-NEXT: .LBB34_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB34_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB34_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB34_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] +; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB34_2 +; CHECK-ARM6-NEXT: .LBB34_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB34_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r1, r2 +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 ; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: adds r2, r1, #1 -; CHECK-ARM6-NEXT: adc r0, r0, #0 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: ldr r1, .LCPI34_0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB34_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: b .LBB34_5 +; CHECK-ARM6-NEXT: .LBB34_5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI34_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_add_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB34_1 ; CHECK-THUMB2-NEXT: .LBB34_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB34_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 +; CHECK-THUMB2-NEXT: adds.w r8, r2, #1 +; CHECK-THUMB2-NEXT: adc r0, r1, #0 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r0 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r1, r0, [r3] -; CHECK-THUMB2-NEXT: mov r2, r0 -; CHECK-THUMB2-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mov r2, r1 -; CHECK-THUMB2-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: adds r1, #1 -; CHECK-THUMB2-NEXT: adc r2, r0, #0 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: .LBB34_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB34_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB34_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB34_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB34_2 +; CHECK-THUMB2-NEXT: .LBB34_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB34_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB34_1 -; CHECK-THUMB2-NEXT: b .LBB34_2 -; CHECK-THUMB2-NEXT: .LBB34_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB34_5 +; CHECK-THUMB2-NEXT: .LBB34_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_add_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -3115,86 +5254,176 @@ define i64 @test_sub_i64() { ; CHECK-ARM8-LABEL: test_sub_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB35_1 -; CHECK-ARM8-NEXT: .LBB35_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM8-NEXT: mov r0, r3 +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB35_1 +; CHECK-ARM8-NEXT: .LBB35_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB35_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: subs r8, r2, #1 +; CHECK-ARM8-NEXT: sbc r0, r1, #0 +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r0 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: .LBB35_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB35_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB35_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB35_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] +; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB35_2 +; CHECK-ARM8-NEXT: .LBB35_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB35_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 ; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r1, r2 +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 ; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: subs r2, r1, #1 -; CHECK-ARM8-NEXT: sbc r0, r0, #0 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB35_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: b .LBB35_5 +; CHECK-ARM8-NEXT: .LBB35_5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_sub_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI35_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB35_1 ; CHECK-ARM6-NEXT: .LBB35_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI35_0 -; CHECK-ARM6-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM6-NEXT: mov r0, r3 +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB35_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: subs r8, r2, #1 +; CHECK-ARM6-NEXT: sbc r0, r1, #0 +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r0 +; CHECK-ARM6-NEXT: ldr r3, .LCPI35_0 +; CHECK-ARM6-NEXT: .LBB35_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB35_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB35_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB35_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] +; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB35_2 +; CHECK-ARM6-NEXT: .LBB35_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB35_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 ; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r1, r2 +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 ; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: subs r2, r1, #1 -; CHECK-ARM6-NEXT: sbc r0, r0, #0 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: ldr r1, .LCPI35_0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB35_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: b .LBB35_5 +; CHECK-ARM6-NEXT: .LBB35_5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI35_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_sub_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB35_1 ; CHECK-THUMB2-NEXT: .LBB35_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB35_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 +; CHECK-THUMB2-NEXT: subs.w r8, r2, #1 +; CHECK-THUMB2-NEXT: sbc r0, r1, #0 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r0 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r1, r0, [r3] -; CHECK-THUMB2-NEXT: mov r2, r0 -; CHECK-THUMB2-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mov r2, r1 -; CHECK-THUMB2-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: subs r1, #1 -; CHECK-THUMB2-NEXT: sbc r2, r0, #0 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: .LBB35_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB35_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB35_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB35_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB35_2 +; CHECK-THUMB2-NEXT: .LBB35_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB35_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB35_1 -; CHECK-THUMB2-NEXT: b .LBB35_2 -; CHECK-THUMB2-NEXT: .LBB35_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB35_5 +; CHECK-THUMB2-NEXT: .LBB35_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_sub_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -3215,85 +5444,176 @@ define i64 @test_and_i64() { ; CHECK-ARM8-LABEL: test_and_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB36_1 -; CHECK-ARM8-NEXT: .LBB36_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r0, r1, [r0] -; CHECK-ARM8-NEXT: mov r2, r1 -; CHECK-ARM8-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: and r2, r0, #1 +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB36_1 +; CHECK-ARM8-NEXT: .LBB36_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB36_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: and r8, r2, #1 ; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r0 +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: .LBB36_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB36_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB36_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB36_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB36_2 +; CHECK-ARM8-NEXT: .LBB36_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB36_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 +; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB36_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: b .LBB36_5 +; CHECK-ARM8-NEXT: .LBB36_5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_and_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI36_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB36_1 ; CHECK-ARM6-NEXT: .LBB36_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI36_0 -; CHECK-ARM6-NEXT: ldrexd r0, r1, [r0] -; CHECK-ARM6-NEXT: mov r2, r1 -; CHECK-ARM6-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 -; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: and r2, r0, #1 +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB36_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: and r8, r2, #1 ; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: ldr r1, .LCPI36_0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r0 +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI36_0 +; CHECK-ARM6-NEXT: .LBB36_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB36_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB36_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB36_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB36_2 +; CHECK-ARM6-NEXT: .LBB36_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB36_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 +; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB36_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: b .LBB36_5 +; CHECK-ARM6-NEXT: .LBB36_5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI36_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_and_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB36_1 ; CHECK-THUMB2-NEXT: .LBB36_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB36_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: and r8, r2, #1 +; CHECK-THUMB2-NEXT: movs r0, #0 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r0 +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r0, r1, [r3] -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: and r1, r0, #1 -; CHECK-THUMB2-NEXT: movs r2, #0 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: .LBB36_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB36_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB36_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB36_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB36_2 +; CHECK-THUMB2-NEXT: .LBB36_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB36_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB36_1 -; CHECK-THUMB2-NEXT: b .LBB36_2 -; CHECK-THUMB2-NEXT: .LBB36_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB36_5 +; CHECK-THUMB2-NEXT: .LBB36_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_and_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -3314,90 +5634,181 @@ define i64 @test_nand_i64() { ; CHECK-ARM8-LABEL: test_nand_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB37_1 -; CHECK-ARM8-NEXT: .LBB37_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r0, r1, [r0] -; CHECK-ARM8-NEXT: mov r2, r1 -; CHECK-ARM8-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 -; CHECK-ARM8-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: mvn r0, r0 -; CHECK-ARM8-NEXT: mvn r1, #1 -; CHECK-ARM8-NEXT: orr r2, r0, r1 +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB37_1 +; CHECK-ARM8-NEXT: .LBB37_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB37_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: mvn r0, r2 +; CHECK-ARM8-NEXT: mvn r3, #1 +; CHECK-ARM8-NEXT: orr r8, r0, r3 ; CHECK-ARM8-NEXT: mvn r0, #0 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r0 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: .LBB37_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB37_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB37_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB37_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB37_2 +; CHECK-ARM8-NEXT: .LBB37_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB37_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 +; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB37_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM8-NEXT: b .LBB37_5 +; CHECK-ARM8-NEXT: .LBB37_5: @ %atomicrmw.end ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_nand_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI37_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB37_1 ; CHECK-ARM6-NEXT: .LBB37_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI37_0 -; CHECK-ARM6-NEXT: ldrexd r0, r1, [r0] -; CHECK-ARM6-NEXT: mov r2, r1 -; CHECK-ARM6-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 -; CHECK-ARM6-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: mvn r0, r0 -; CHECK-ARM6-NEXT: mvn r1, #1 -; CHECK-ARM6-NEXT: orr r2, r0, r1 +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB37_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: mvn r0, r2 +; CHECK-ARM6-NEXT: mvn r3, #1 +; CHECK-ARM6-NEXT: orr r8, r0, r3 ; CHECK-ARM6-NEXT: mvn r0, #0 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: ldr r1, .LCPI37_0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r0 +; CHECK-ARM6-NEXT: ldr r3, .LCPI37_0 +; CHECK-ARM6-NEXT: .LBB37_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB37_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB37_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB37_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB37_2 +; CHECK-ARM6-NEXT: .LBB37_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB37_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 +; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB37_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM6-NEXT: b .LBB37_5 +; CHECK-ARM6-NEXT: .LBB37_5: @ %atomicrmw.end ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI37_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_nand_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB37_1 ; CHECK-THUMB2-NEXT: .LBB37_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB37_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 +; CHECK-THUMB2-NEXT: mvn r0, #1 +; CHECK-THUMB2-NEXT: orn r8, r0, r2 +; CHECK-THUMB2-NEXT: mov.w r0, #-1 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r0 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r1, r0, [r3] -; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mov r0, r1 -; CHECK-THUMB2-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mvn r0, #1 -; CHECK-THUMB2-NEXT: orn r1, r0, r1 -; CHECK-THUMB2-NEXT: mov.w r2, #-1 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: .LBB37_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB37_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB37_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB37_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB37_2 +; CHECK-THUMB2-NEXT: .LBB37_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB37_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB37_1 -; CHECK-THUMB2-NEXT: b .LBB37_2 -; CHECK-THUMB2-NEXT: .LBB37_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB37_5 +; CHECK-THUMB2-NEXT: .LBB37_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_nand_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -3418,85 +5829,173 @@ define i64 @test_or_i64() { ; CHECK-ARM8-LABEL: test_or_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB38_1 -; CHECK-ARM8-NEXT: .LBB38_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM8-NEXT: mov r0, r3 -; CHECK-ARM8-NEXT: mov r1, r2 -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r2, r0 -; CHECK-ARM8-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: orr r2, r1, #1 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB38_1 +; CHECK-ARM8-NEXT: .LBB38_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB38_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: orr r8, r2, #1 +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r1 +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: .LBB38_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB38_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB38_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB38_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB38_2 +; CHECK-ARM8-NEXT: .LBB38_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB38_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 +; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB38_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end -; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: b .LBB38_5 +; CHECK-ARM8-NEXT: .LBB38_5: @ %atomicrmw.end +; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_or_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI38_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB38_1 ; CHECK-ARM6-NEXT: .LBB38_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI38_0 -; CHECK-ARM6-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM6-NEXT: mov r0, r3 -; CHECK-ARM6-NEXT: mov r1, r2 -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r2, r0 -; CHECK-ARM6-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: orr r2, r1, #1 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: ldr r1, .LCPI38_0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB38_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: orr r8, r2, #1 +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r1 +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI38_0 +; CHECK-ARM6-NEXT: .LBB38_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB38_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB38_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB38_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB38_2 +; CHECK-ARM6-NEXT: .LBB38_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB38_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 +; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB38_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end -; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: b .LBB38_5 +; CHECK-ARM6-NEXT: .LBB38_5: @ %atomicrmw.end +; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI38_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_or_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB38_1 ; CHECK-THUMB2-NEXT: .LBB38_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB38_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: orr r8, r2, #1 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r1 +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r0, r2, [r3] -; CHECK-THUMB2-NEXT: mov r1, r2 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: orr r1, r0, #1 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: .LBB38_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB38_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB38_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB38_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB38_2 +; CHECK-THUMB2-NEXT: .LBB38_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB38_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB38_1 -; CHECK-THUMB2-NEXT: b .LBB38_2 -; CHECK-THUMB2-NEXT: .LBB38_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB38_5 +; CHECK-THUMB2-NEXT: .LBB38_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_or_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry @@ -3517,85 +6016,173 @@ define i64 @test_xor_i64() { ; CHECK-ARM8-LABEL: test_xor_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: sub sp, sp, #8 -; CHECK-ARM8-NEXT: b .LBB39_1 -; CHECK-ARM8-NEXT: .LBB39_1: @ %atomicrmw.start -; CHECK-ARM8-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: add r11, sp, #24 +; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-ARM8-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM8-NEXT: mov r0, r3 -; CHECK-ARM8-NEXT: mov r1, r2 -; CHECK-ARM8-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM8-NEXT: mov r2, r0 -; CHECK-ARM8-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-ARM8-NEXT: eor r2, r1, #1 -; CHECK-ARM8-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM8-NEXT: mov r3, r0 -; CHECK-ARM8-NEXT: movw r1, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r1, :upper16:atomic_i64 -; CHECK-ARM8-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM8-NEXT: ldr r1, [r0] +; CHECK-ARM8-NEXT: ldr r0, [r0, #4] +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-ARM8-NEXT: b .LBB39_1 +; CHECK-ARM8-NEXT: .LBB39_1: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM8-NEXT: @ Child Loop BB39_2 Depth 2 +; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM8-NEXT: eor r8, r2, #1 +; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: mov r9, r1 +; CHECK-ARM8-NEXT: mov r6, r2 +; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: .LBB39_2: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ Parent Loop BB39_1 Depth=1 +; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM8-NEXT: cmp r4, r6 +; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: bne .LBB39_4 +; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB39_2 Depth=2 +; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: bne .LBB39_2 +; CHECK-ARM8-NEXT: .LBB39_4: @ %atomicrmw.start +; CHECK-ARM8-NEXT: @ in Loop: Header=BB39_1 Depth=1 +; CHECK-ARM8-NEXT: mov r0, r5 +; CHECK-ARM8-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r3, r0, r1 +; CHECK-ARM8-NEXT: mov r1, r4 +; CHECK-ARM8-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM8-NEXT: eor r2, r1, r2 +; CHECK-ARM8-NEXT: orr r2, r2, r3 +; CHECK-ARM8-NEXT: cmp r2, #0 +; CHECK-ARM8-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM8-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM8-NEXT: bne .LBB39_1 -; CHECK-ARM8-NEXT: @ %bb.2: @ %atomicrmw.end -; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM8-NEXT: add sp, sp, #8 -; CHECK-ARM8-NEXT: bx lr +; CHECK-ARM8-NEXT: b .LBB39_5 +; CHECK-ARM8-NEXT: .LBB39_5: @ %atomicrmw.end +; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-ARM8-NEXT: sub sp, r11, #24 +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-ARM6-LABEL: test_xor_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: sub sp, sp, #8 +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: add r11, sp, #24 +; CHECK-ARM6-NEXT: sub sp, sp, #16 +; CHECK-ARM6-NEXT: ldr r0, .LCPI39_0 +; CHECK-ARM6-NEXT: ldr r1, [r0] +; CHECK-ARM6-NEXT: ldr r0, [r0, #4] +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: b .LBB39_1 ; CHECK-ARM6-NEXT: .LBB39_1: @ %atomicrmw.start -; CHECK-ARM6-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-ARM6-NEXT: ldr r0, .LCPI39_0 -; CHECK-ARM6-NEXT: ldrexd r2, r3, [r0] -; CHECK-ARM6-NEXT: mov r0, r3 -; CHECK-ARM6-NEXT: mov r1, r2 -; CHECK-ARM6-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-ARM6-NEXT: mov r2, r0 -; CHECK-ARM6-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-ARM6-NEXT: eor r2, r1, #1 -; CHECK-ARM6-NEXT: @ kill: def $r2 killed $r2 def $r2_r3 -; CHECK-ARM6-NEXT: mov r3, r0 -; CHECK-ARM6-NEXT: ldr r1, .LCPI39_0 -; CHECK-ARM6-NEXT: strexd r0, r2, r3, [r1] +; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-ARM6-NEXT: @ Child Loop BB39_2 Depth 2 +; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-ARM6-NEXT: eor r8, r2, #1 +; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: mov r9, r1 +; CHECK-ARM6-NEXT: mov r6, r2 +; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: ldr r3, .LCPI39_0 +; CHECK-ARM6-NEXT: .LBB39_2: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ Parent Loop BB39_1 Depth=1 +; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] +; CHECK-ARM6-NEXT: cmp r4, r6 +; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: bne .LBB39_4 +; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB39_2 Depth=2 +; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] ; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: bne .LBB39_2 +; CHECK-ARM6-NEXT: .LBB39_4: @ %atomicrmw.start +; CHECK-ARM6-NEXT: @ in Loop: Header=BB39_1 Depth=1 +; CHECK-ARM6-NEXT: mov r0, r5 +; CHECK-ARM6-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r3, r0, r1 +; CHECK-ARM6-NEXT: mov r1, r4 +; CHECK-ARM6-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-ARM6-NEXT: eor r2, r1, r2 +; CHECK-ARM6-NEXT: orr r2, r2, r3 +; CHECK-ARM6-NEXT: cmp r2, #0 +; CHECK-ARM6-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-ARM6-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-ARM6-NEXT: bne .LBB39_1 -; CHECK-ARM6-NEXT: @ %bb.2: @ %atomicrmw.end -; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-ARM6-NEXT: add sp, sp, #8 -; CHECK-ARM6-NEXT: bx lr +; CHECK-ARM6-NEXT: b .LBB39_5 +; CHECK-ARM6-NEXT: .LBB39_5: @ %atomicrmw.end +; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-ARM6-NEXT: sub sp, r11, #24 +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 -; CHECK-ARM6-NEXT: @ %bb.3: +; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI39_0: ; CHECK-ARM6-NEXT: .long atomic_i64 ; ; CHECK-THUMB2-LABEL: test_xor_i64: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: sub sp, #8 +; CHECK-THUMB2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB2-NEXT: sub sp, #16 +; CHECK-THUMB2-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-THUMB2-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-THUMB2-NEXT: ldr r1, [r0] +; CHECK-THUMB2-NEXT: ldr r0, [r0, #4] +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: b .LBB39_1 ; CHECK-THUMB2-NEXT: .LBB39_1: @ %atomicrmw.start -; CHECK-THUMB2-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-THUMB2-NEXT: @ Child Loop BB39_2 Depth 2 +; CHECK-THUMB2-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-THUMB2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB2-NEXT: eor r8, r2, #1 +; CHECK-THUMB2-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB2-NEXT: mov r9, r1 +; CHECK-THUMB2-NEXT: mov r6, r2 +; CHECK-THUMB2-NEXT: mov r7, r1 ; CHECK-THUMB2-NEXT: movw r3, :lower16:atomic_i64 ; CHECK-THUMB2-NEXT: movt r3, :upper16:atomic_i64 -; CHECK-THUMB2-NEXT: ldrexd r0, r2, [r3] -; CHECK-THUMB2-NEXT: mov r1, r2 -; CHECK-THUMB2-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-THUMB2-NEXT: mov r1, r0 -; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-THUMB2-NEXT: eor r1, r0, #1 -; CHECK-THUMB2-NEXT: strexd r0, r1, r2, [r3] +; CHECK-THUMB2-NEXT: .LBB39_2: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ Parent Loop BB39_1 Depth=1 +; CHECK-THUMB2-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-THUMB2-NEXT: ldrexd r4, r5, [r3] +; CHECK-THUMB2-NEXT: cmp r4, r6 +; CHECK-THUMB2-NEXT: it eq +; CHECK-THUMB2-NEXT: cmpeq r5, r7 +; CHECK-THUMB2-NEXT: bne .LBB39_4 +; CHECK-THUMB2-NEXT: @ %bb.3: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB39_2 Depth=2 +; CHECK-THUMB2-NEXT: strexd r0, r8, r9, [r3] ; CHECK-THUMB2-NEXT: cmp r0, #0 +; CHECK-THUMB2-NEXT: bne .LBB39_2 +; CHECK-THUMB2-NEXT: .LBB39_4: @ %atomicrmw.start +; CHECK-THUMB2-NEXT: @ in Loop: Header=BB39_1 Depth=1 +; CHECK-THUMB2-NEXT: mov r0, r5 +; CHECK-THUMB2-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eor.w r3, r0, r1 +; CHECK-THUMB2-NEXT: mov r1, r4 +; CHECK-THUMB2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB2-NEXT: eors r2, r1 +; CHECK-THUMB2-NEXT: orrs r2, r3 +; CHECK-THUMB2-NEXT: cmp r2, #0 +; CHECK-THUMB2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB2-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-THUMB2-NEXT: bne .LBB39_1 -; CHECK-THUMB2-NEXT: b .LBB39_2 -; CHECK-THUMB2-NEXT: .LBB39_2: @ %atomicrmw.end +; CHECK-THUMB2-NEXT: b .LBB39_5 +; CHECK-THUMB2-NEXT: .LBB39_5: @ %atomicrmw.end ; CHECK-THUMB2-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-THUMB2-NEXT: add sp, #8 -; CHECK-THUMB2-NEXT: bx lr +; CHECK-THUMB2-NEXT: add sp, #16 +; CHECK-THUMB2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; ; CHECK-THUMB1-LABEL: test_xor_i64: ; CHECK-THUMB1: @ %bb.0: @ %entry