diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp --- a/llvm/lib/CodeGen/MacroFusion.cpp +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -177,7 +177,7 @@ // Explorer for fusion candidates among the dependencies of the anchor instr. for (SDep &Dep : AnchorSU.Preds) { // Ignore dependencies other than data or strong ordering. - if (Dep.isWeak() || isHazard(Dep)) + if (Dep.getKind() != SDep::Data) continue; SUnit &DepSU = *Dep.getSUnit(); diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -206,13 +206,14 @@ ExitSU.setInstr(ExitMI); // Add dependencies on the defs and uses of the instruction. if (ExitMI) { - for (const MachineOperand &MO : ExitMI->operands()) { + for (unsigned I = 0, N = ExitMI->getNumOperands(); I < N; ++I) { + const MachineOperand &MO = ExitMI->getOperand(I); if (!MO.isReg() || MO.isDef()) continue; Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { - Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); + Uses.insert(PhysRegSUOper(&ExitSU, I, Reg)); } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { - addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO)); + addVRegUseDeps(&ExitSU, I); } } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -53,8 +53,6 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x0, _asdf@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 @@ -67,6 +65,8 @@ ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll @@ -120,10 +120,10 @@ ; ; SDAG-LABEL: extra_use1: ; SDAG: ; %bb.0: ; %bb -; SDAG-NEXT: bfi x1, x0, #1, #63 ; SDAG-NEXT: lsl x8, x0, #1 -; SDAG-NEXT: mov x0, x1 +; SDAG-NEXT: bfi x1, x0, #1, #63 ; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: ret bb: %tmp3 = shl i64 %in1, 1 @@ -145,8 +145,8 @@ ; SDAG: ; %bb.0: ; %bb ; SDAG-NEXT: and x8, x1, #0x1 ; SDAG-NEXT: bfi x1, x0, #1, #63 -; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: ret bb: %tmp3 = shl i64 %in1, 1 diff --git a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll --- a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll +++ b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -6,13 +6,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: add x8, sp, #40 -; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: stp x30, x18, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x1, x2, [sp, #40] ; CHECK-NEXT: stp x3, x4, [sp, #56] ; CHECK-NEXT: stp x5, x6, [sp, #72] ; CHECK-NEXT: str x7, [sp, #88] ; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: bl other_func ; CHECK-NEXT: ldp x30, x18, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -36,8 +36,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x18, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -53,8 +53,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x18, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: add x0, sp, #16 ; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add x0, sp, #16 ; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -71,8 +71,8 @@ ; CHECK-NEXT: str x18, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: str x7, [sp, #24] -; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: ldr x18, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -319,8 +319,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mvn w8, w0 ; CHECK-NEXT: adds w8, w8, w1 -; CHECK-NEXT: cset w0, vs ; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: cset w0, vs ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %nota = xor i32 %a, -1 diff --git a/llvm/test/CodeGen/AArch64/align-down.ll b/llvm/test/CodeGen/AArch64/align-down.ll --- a/llvm/test/CodeGen/AArch64/align-down.ll +++ b/llvm/test/CodeGen/AArch64/align-down.ll @@ -54,8 +54,8 @@ define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind { ; CHECK-LABEL: t3_extrause0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w9, w1 ; CHECK-NEXT: sub w8, w1, #1 +; CHECK-NEXT: neg w9, w1 ; CHECK-NEXT: and w0, w0, w9 ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll --- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll +++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll @@ -25,8 +25,8 @@ ; CHECK-NEXT: scvtf s4, w0 ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fsub s9, s0, s4 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: sub w19, w0, #1 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __Z3goof ; CHECK-NEXT: fmov s10, s0 ; CHECK-NEXT: fmov s0, s9 @@ -64,8 +64,8 @@ ; CHECK-LINUX-NEXT: scvtf s4, w0 ; CHECK-LINUX-NEXT: fadd s0, s0, s2 ; CHECK-LINUX-NEXT: fsub s9, s0, s4 -; CHECK-LINUX-NEXT: fmov s0, s8 ; CHECK-LINUX-NEXT: sub w19, w0, #1 +; CHECK-LINUX-NEXT: fmov s0, s8 ; CHECK-LINUX-NEXT: bl _Z3goof ; CHECK-LINUX-NEXT: fmov s10, s0 ; CHECK-LINUX-NEXT: fmov s0, s9 diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -65,8 +65,8 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -101,9 +101,9 @@ ; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str wzr, [sp, #8] ; CHECK-NEXT: str xzr, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 @@ -122,8 +122,8 @@ ; CHECK-NEXT: stp xzr, x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 @@ -142,9 +142,9 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: stp xzr, xzr, [sp, #8] ; CHECK-NEXT: str wzr, [sp, #24] +; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -163,10 +163,10 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp xzr, xzr, [sp] ; CHECK-NEXT: strh wzr, [sp, #24] ; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -186,8 +186,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -207,9 +207,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 @@ -229,9 +229,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -251,10 +251,10 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -274,11 +274,11 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -299,7 +299,6 @@ ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] @@ -308,6 +307,7 @@ ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 @@ -326,8 +326,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w8, #-1431655766 -; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: str w8, [sp, #12] +; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -364,9 +364,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 @@ -385,9 +385,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp x8, x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: str x8, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 @@ -407,9 +407,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: stp x8, x8, [sp, #8] ; CHECK-NEXT: str w8, [sp, #24] +; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -429,10 +429,10 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp x8, x8, [sp, #8] ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: strh w8, [sp, #24] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -452,8 +452,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -474,9 +474,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 @@ -496,9 +496,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -519,10 +519,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -542,11 +542,11 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -567,7 +567,6 @@ ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] @@ -576,6 +575,7 @@ ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 diff --git a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: mov w8, #9 ; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: csel w8, w9, w8, eq +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: str w8, [x8] ; CHECK-NEXT: ret %1 = icmp eq i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/branch-relax-asm.ll b/llvm/test/CodeGen/AArch64/branch-relax-asm.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-asm.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-asm.ll @@ -12,7 +12,6 @@ ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %true -; CHECK-NEXT: mov w0, #4 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -21,6 +20,7 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov w0, #4 ; CHECK-NEXT: ret %val = and i32 %in, 1 %tst = icmp eq i32 %val, 0 diff --git a/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll b/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll @@ -15,12 +15,12 @@ ; CHECK-NEXT: str w8, [x8] ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %bb2 -; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: mov w8, #9 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w8, [x8] ; CHECK-NEXT: ret %1 = fcmp ueq float %x, %y diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll --- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -38,8 +38,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: cmp w8, #42 ; CHECK-NEXT: sub w9, w9, w0 +; CHECK-NEXT: cmp w8, #42 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: strb w9, [x1] ; CHECK-NEXT: ret @@ -56,8 +56,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: mov w9, #43 -; CHECK-NEXT: cmp w8, #43 ; CHECK-NEXT: sub w9, w9, w0 +; CHECK-NEXT: cmp w8, #43 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: ret @@ -73,8 +73,8 @@ ; CHECK-LABEL: usubo_ult_constant_op1_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: cmp w8, #44 ; CHECK-NEXT: sub w9, w0, #44 +; CHECK-NEXT: cmp w8, #44 ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: ret @@ -91,8 +91,8 @@ ; CHECK-NEXT: cmp w8, #45 ; CHECK-NEXT: cset w8, lo ; CHECK-NEXT: sub w9, w0, #45 -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: strb w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %ov = icmp ugt i8 45, %x %s = add i8 %x, -45 @@ -105,8 +105,8 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) nounwind { ; CHECK-LABEL: usubo_eq_constant1_op1_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: sub w8, w0, #1 +; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret @@ -158,10 +158,10 @@ ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: cset w21, lo -; CHECK-NEXT: mov x23, x0 -; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: mov x20, x2 ; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: mov x23, x0 +; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: bl call ; CHECK-NEXT: subs x8, x23, x22 ; CHECK-NEXT: b.hs .LBB8_3 diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -178,10 +178,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v2.4s, #1 -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -662,9 +662,9 @@ ; CHECK-NEXT: mov w0, #-1 ; CHECK-NEXT: bl yoo ; CHECK-NEXT: cmp w19, #0 +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: cinc w0, w19, gt ; CHECK-NEXT: mov w1, #2 -; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl xoo ; CHECK-NEXT: fmov d0, #-1.00000000 ; CHECK-NEXT: fadd d0, d8, d0 diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -115,9 +115,9 @@ ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fmov d2, #-2.00000000 ; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fadd d8, d0, d1 ; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl use ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov d0, d8 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -562,8 +562,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-251658240 ; CHECK-NEXT: fmov s0, w8 @@ -592,8 +592,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-16777216 ; CHECK-NEXT: fmov s0, w8 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -253,11 +253,11 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w8, #-2147483648 @@ -302,16 +302,16 @@ ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w20, #-2147483648 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -319,18 +319,18 @@ ; CHECK-NEXT: csel w19, w21, w19, gt ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bl __unordtf2 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csel w22, wzr, w19, ne -; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -372,16 +372,16 @@ ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w20, #-2147483648 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -389,18 +389,18 @@ ; CHECK-NEXT: csel w19, w21, w19, gt ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bl __unordtf2 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csel w22, wzr, w19, ne -; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -415,13 +415,13 @@ ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -463,16 +463,16 @@ ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w20, #-2147483648 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -480,17 +480,17 @@ ; CHECK-NEXT: csel w19, w21, w19, gt ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bl __unordtf2 -; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csel w22, wzr, w19, ne +; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -505,13 +505,13 @@ ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -525,13 +525,13 @@ ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -1871,9 +1871,9 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 @@ -1885,9 +1885,9 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x21, xzr, x8, vs ; CHECK-NEXT: csel x22, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 @@ -1898,9 +1898,9 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x23, xzr, x8, vs ; CHECK-NEXT: csel x24, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, x25, x1, lt @@ -1979,9 +1979,9 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 @@ -1993,9 +1993,9 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x21, xzr, x8, vs ; CHECK-NEXT: csel x22, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 @@ -2006,9 +2006,9 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x23, xzr, x8, vs ; CHECK-NEXT: csel x24, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, x25, x1, lt diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -481,8 +481,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload @@ -505,8 +505,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -253,17 +253,17 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: adrp x8, .LCPI14_1 +; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 -; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le @@ -292,28 +292,28 @@ ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w20, w19, wzr, le -; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le @@ -346,27 +346,27 @@ ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w20, w19, wzr, le +; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le @@ -376,13 +376,13 @@ ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -411,32 +411,32 @@ ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w20, w19, wzr, le -; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le @@ -446,12 +446,12 @@ ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 @@ -460,13 +460,13 @@ ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload ; CHECK-NEXT: bl __getf2 -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -1606,14 +1606,14 @@ ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: csel x10, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov x25, #68719476735 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, x25, x10, gt ; CHECK-NEXT: csinv x20, x9, xzr, le +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -1622,9 +1622,9 @@ ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x21, x25, x9, gt ; CHECK-NEXT: csinv x22, x8, xzr, le +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -1632,9 +1632,9 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x23, x25, x9, gt ; CHECK-NEXT: csinv x24, x8, xzr, le +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -1696,9 +1696,9 @@ ; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x19, x10, xzr, le ; CHECK-NEXT: csinv x20, x9, xzr, le +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -1707,9 +1707,9 @@ ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x21, x9, xzr, le ; CHECK-NEXT: csinv x22, x8, xzr, le +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -1717,9 +1717,9 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x23, x9, xzr, le ; CHECK-NEXT: csinv x24, x8, xzr, le +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -17,15 +17,15 @@ ; A53-NEXT: movi v0.2d, #0000000000000000 ; A53-NEXT: mov x8, x0 ; A53-NEXT: mov x19, x8 -; A53-NEXT: mov w0, w1 ; A53-NEXT: mov w9, #256 +; A53-NEXT: mov w0, w1 ; A53-NEXT: stp x2, x3, [x8, #32] ; A53-NEXT: mov x2, x8 +; A53-NEXT: str q0, [x8] ; A53-NEXT: str q0, [x19, #16]! +; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str w1, [x19] ; A53-NEXT: mov w1, #4 -; A53-NEXT: str q0, [x8] -; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str wzr, [x8, #20] ; A53-NEXT: bl fcntl ; A53-NEXT: adrp x9, gv0 diff --git a/llvm/test/CodeGen/AArch64/misched-fusion.ll b/llvm/test/CodeGen/AArch64/misched-fusion.ll --- a/llvm/test/CodeGen/AArch64/misched-fusion.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion.ll @@ -1,6 +1,7 @@ ; RUN: llc -o - %s -mtriple=aarch64-unknown -aarch64-enable-cond-br-tune=false -mattr=+arith-bcc-fusion | FileCheck %s --check-prefix=FUSEBCC ; RUN: llc -o - %s -mtriple=aarch64-unknown -aarch64-enable-cond-br-tune=false -mattr=+arith-cbz-fusion | FileCheck %s --check-prefix=FUSECBZ ; RUN: llc -o - %s -mtriple=aarch64-unknown -aarch64-enable-cond-br-tune=false -mcpu=cyclone | FileCheck %s --check-prefix=FUSEBCC --check-prefix=FUSECBZ +; RUN: llc -o - %s -mtriple=aarch64-unknown -aarch64-enable-cond-br-tune=false -mcpu=cortex-a55 -mattr=+arith-bcc-fusion,+arith-cbz-fusion| FileCheck %s --check-prefix=FUSEBCC --check-prefix=FUSECBZ target triple = "aarch64-unknown" diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -356,9 +356,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: stnp d0, d1, [sp] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl _dummy ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 @@ -374,9 +374,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: mov d1, v0[1] -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: stnp d0, d1, [sp, #16] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl _dummy ; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -69,14 +69,14 @@ ; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl powf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload @@ -110,14 +110,14 @@ ; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: fmov d1, #0.25000000 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl pow ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl pow ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -181,10 +181,10 @@ ; CHECK-NEXT: ldr x8, [sp, #48] ; CHECK-NEXT: ldr q0, [sp, #32] ; CHECK-NEXT: mov w0, #42 -; CHECK-NEXT: mov w1, #17 ; CHECK-NEXT: mov x18, xzr ; CHECK-NEXT: str x8, [sp, #16] ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: mov w1, #17 ; CHECK-NEXT: bl consume_attributes ; CHECK-NEXT: .Ltmp11: ; CHECK-NEXT: add sp, sp, #32 diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -18,11 +18,11 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl callee1 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload @@ -49,7 +49,13 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: fmov s0, #1.00000000 +; CHECK-NEXT: str x8, [sp] +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 ; CHECK-NEXT: mov w3, #3 @@ -57,12 +63,6 @@ ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: st1d { z16.d }, p0, [x9] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: add sp, sp, #16 @@ -90,10 +90,10 @@ ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: fmov s1, #2.00000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl callee3 ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll --- a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll @@ -24,8 +24,8 @@ ; CHECK-LABEL: test_post_ld1_dup: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-NEXT: add x8, x0, x2, lsl #3 +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %load = load double, double* %a diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -206,8 +206,8 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0xffff00 -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -212,8 +212,8 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0x55555555 -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -208,8 +208,8 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0xf0f0f0f -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -201,8 +201,8 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, #0xffff -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -556,8 +556,8 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: and w20, w8, w3 -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -52,9 +52,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: str d0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -75,9 +75,9 @@ ; CHECK-LABEL: sin_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -105,9 +105,9 @@ ; CHECK-LABEL: sin_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -267,9 +267,9 @@ ; CHECK-LABEL: cos_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl cosf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -297,9 +297,9 @@ ; CHECK-LABEL: exp_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl expf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -327,9 +327,9 @@ ; CHECK-LABEL: exp2_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -366,9 +366,9 @@ ; CHECK-LABEL: log_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl logf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -396,9 +396,9 @@ ; CHECK-LABEL: log10_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl log10f ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -426,9 +426,9 @@ ; CHECK-LABEL: log2_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl log2f ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -106,9 +106,9 @@ ; CHECK-NEXT: mov w1, v2.s[1] ; CHECK-NEXT: mov w2, v2.s[2] ; CHECK-NEXT: mov w3, v2.s[3] +; CHECK-NEXT: str q0, [x11] ; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fmov w0, s2 -; CHECK-NEXT: str q0, [x11] ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.uadd.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -215,23 +215,23 @@ ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s ; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: strh w9, [x0, #6] ; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: lsr w10, w10, #16 +; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: mvn v0.16b, v1.16b ; CHECK-NEXT: lsr w8, w11, #16 ; CHECK-NEXT: strh w11, [x0] ; CHECK-NEXT: strb w9, [x0, #8] ; CHECK-NEXT: strb w10, [x0, #5] +; CHECK-NEXT: mvn v0.16b, v1.16b ; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.uadd.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) @@ -254,15 +254,15 @@ ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w8, w9, #1, #1 ; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: and v0.8b, v1.8b, v2.8b ; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: cmeq v0.4h, v0.4h, v1.4h +; CHECK-NEXT: and v0.8b, v1.8b, v2.8b ; CHECK-NEXT: bfi w8, w9, #2, #1 ; CHECK-NEXT: umov w9, v1.h[3] -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: cmeq v0.4h, v0.4h, v1.4h ; CHECK-NEXT: bfi w8, w9, #3, #29 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -289,13 +289,13 @@ ; CHECK-NEXT: cset w14, lo ; CHECK-NEXT: cmp x13, x1 ; CHECK-NEXT: cset w15, lo -; CHECK-NEXT: csel w14, w14, w15, eq ; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: csel w14, w14, w15, eq ; CHECK-NEXT: fmov s0, w14 ; CHECK-NEXT: mov v0.s[1], w11 ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: sshr v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x9, x10, [x8, #16] +; CHECK-NEXT: sshr v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x12, x13, [x8] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -21,8 +21,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 -; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s ; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s ; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: ret %t = call {<1 x i32>, <1 x i1>} @llvm.umul.with.overflow.v1i32(<1 x i32> %a0, <1 x i32> %a1) @@ -38,8 +38,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 -; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s ; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %t = call {<2 x i32>, <2 x i1>} @llvm.umul.with.overflow.v2i32(<2 x i32> %a0, <2 x i32> %a1) @@ -56,8 +56,8 @@ ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s -; CHECK-NEXT: uzp2 v0.4s, v3.4s, v2.4s ; CHECK-NEXT: add x8, x0, #8 +; CHECK-NEXT: uzp2 v0.4s, v3.4s, v2.4s ; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s ; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: str d1, [x0] @@ -117,15 +117,15 @@ ; CHECK-NEXT: umull v4.2d, v3.2s, v2.2s ; CHECK-NEXT: uzp2 v0.4s, v4.4s, v0.4s ; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s -; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s ; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s +; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: str q2, [x11] ; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: str q2, [x11] ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -234,12 +234,12 @@ ; CHECK-NEXT: csetm x11, ne ; CHECK-NEXT: cmp xzr, x12 ; CHECK-NEXT: csetm x12, ne -; CHECK-NEXT: fmov d0, x12 ; CHECK-NEXT: mul x8, x10, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x12 ; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret %t = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> %a0, <2 x i64> %a1) @@ -258,24 +258,24 @@ ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mov w8, v0.s[3] ; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s ; CHECK-NEXT: ushr v2.4s, v0.4s, #24 -; CHECK-NEXT: mov w8, v0.s[3] ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: cmeq v0.4s, v1.4s, #0 -; CHECK-NEXT: cmtst v1.4s, v2.4s, v2.4s ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: cmeq v0.4s, v1.4s, #0 +; CHECK-NEXT: cmtst v1.4s, v2.4s, v2.4s ; CHECK-NEXT: strh w9, [x0, #6] ; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: lsr w10, w10, #16 -; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: strb w8, [x0, #11] ; CHECK-NEXT: lsr w8, w11, #16 ; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: strb w9, [x0, #8] ; CHECK-NEXT: strb w10, [x0, #5] ; CHECK-NEXT: strb w8, [x0, #2] @@ -302,8 +302,8 @@ ; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: bfi w8, w9, #3, #29 ; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) %val = extractvalue {<4 x i1>, <4 x i1>} %t, 0 @@ -359,8 +359,8 @@ ; CHECK-NEXT: orr w10, w13, w14 ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: mul x16, x0, x4 +; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: sshr v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x16, x11, [x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -142,10 +142,10 @@ ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll --- a/llvm/test/CodeGen/X86/atomic-idempotent.ll +++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll @@ -278,15 +278,15 @@ ; X86-ATOM-NEXT: pushl %ecx ; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 ; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp -; X86-ATOM-NEXT: movl (%esp), %ecx +; X86-ATOM-NEXT: movl (%esp), %eax +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-ATOM-NEXT: movl %eax, 8(%esi) +; X86-ATOM-NEXT: movl %edx, 8(%esi) ; X86-ATOM-NEXT: movl %edi, 12(%esi) -; X86-ATOM-NEXT: movl %ecx, (%esi) +; X86-ATOM-NEXT: movl %eax, (%esi) +; X86-ATOM-NEXT: movl %ecx, 4(%esi) ; X86-ATOM-NEXT: movl %esi, %eax -; X86-ATOM-NEXT: movl %edx, 4(%esi) ; X86-ATOM-NEXT: leal -8(%ebp), %esp ; X86-ATOM-NEXT: popl %esi ; X86-ATOM-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll b/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll --- a/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll +++ b/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll @@ -24,9 +24,9 @@ ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq NullToken@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq PartClass@GOTPCREL(%rip), %r10 +; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movl (%r10), %ebp ; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -99,9 +99,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq qux@PLT +; CHECK-NEXT: movl %eax, %ebx ; CHECK-NEXT: movl $.Ltmp0, %edi ; CHECK-NEXT: movl $.Ltmp1, %esi -; CHECK-NEXT: movl %eax, %ebx ; CHECK-NEXT: callq choose@PLT ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: testb $1, %bl