diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -36,12 +36,16 @@ #include "AArch64ExpandImm.h" #include "AArch64InstrInfo.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "AArch64RegisterInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" - +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-mi-peephole-opt" @@ -97,6 +101,10 @@ template bool visitAND(unsigned Opc, MachineInstr &MI); bool visitORR(MachineInstr &MI); + bool visitCopy(MachineInstr& MI); + + bool isGPRegister(Register Reg); + bool isFPRRegister(Register Reg); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -155,6 +163,27 @@ return true; } +bool AArch64MIPeepholeOpt::isFPRRegister(Register Reg) { + // FIXME: This is detected from NEON registers. + if (!Reg.isVirtual()) + return false; + const TargetRegisterClass *RC = + MRI->getRegClass(Reg); + + return (RC == &AArch64::FPR32RegClass || RC == &AArch64::FPR64RegClass || + RC == &AArch64::FPR128RegClass); +} + +bool AArch64MIPeepholeOpt::isGPRegister(Register Reg) { + // FIXME: This is detected from NEON registers. + if (!Reg.isVirtual()) + return false; + const TargetRegisterClass *RC = + MRI->getRegClass(Reg); + + return (RC == &AArch64::GPR32RegClass || RC == &AArch64::GPR64RegClass); +} + template bool AArch64MIPeepholeOpt::visitAND( unsigned Opc, MachineInstr &MI) { @@ -189,6 +218,69 @@ }); } +// SSA form, no need to worry about register usage. +bool AArch64MIPeepholeOpt::visitCopy(MachineInstr& MI) { + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineOperand &DstOperand = MI.getOperand(0); + const MachineOperand &SrcOperand = MI.getOperand(1); + if (!DstOperand.isReg() || !SrcOperand.isReg() || !MRI.hasOneNonDBGUse(SrcOperand.getReg())) + return false; + + auto *SrcMI = MRI.getUniqueVRegDef(SrcOperand.getReg()); + //assert(SrcMI && "Machine SSA form expects exactly one definition"); + // FIXME: Why no definition? + if (!SrcMI) { + return false; + } + + unsigned NewOpCode = -1; + + // FIXME: Generalize the implementation for store. + if (isFPRRegister(DstOperand.getReg()) && isGPRegister(SrcOperand.getReg())) { + switch (SrcMI->getOpcode()) { + case AArch64::LDRXui: + NewOpCode = AArch64::LDRDui; + break; + case AArch64::LDRWui: + NewOpCode = AArch64::LDRSui; + break; + // The following two patterns are not done due to zext + // - LDRHHui -> LDRHui + // - LDRBBui -> LDRBBui + } + if (NewOpCode != -1) { + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpCode), + DstOperand.getReg()) + .add(SrcMI->getOperand(1)) + .add(SrcMI->getOperand(2)); + SrcMI->eraseFromParent(); + MI.eraseFromParent(); + return true; + } + } else if (isGPRegister(DstOperand.getReg())) { + switch (SrcMI->getOpcode()) { + case AArch64::LDRDui: + NewOpCode = AArch64::LDRXui; + break; + case AArch64::LDRSui: + NewOpCode = AArch64::LDRWui; + break; + // FIXME: What about the other two patterns? + } + if (NewOpCode != -1) { + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpCode), + DstOperand.getReg()) + .add(SrcMI->getOperand(1)) + .add(SrcMI->getOperand(2)); + SrcMI->eraseFromParent(); + MI.eraseFromParent(); + return true; + } + } + + return false; +} + bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { // Check this ORR comes from below zero-extend pattern. // @@ -534,6 +626,9 @@ {AArch64::ADDXri, AArch64::ADDSXri}, MI); break; + case AArch64::COPY: + Changed = visitCopy(MI); + break; } } } diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -223,16 +223,15 @@ define <32 x i8> @zext_v32i1(<32 x i1> %arg) { ; CHECK-LABEL: zext_v32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: ldr s1, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w9, [sp] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: movi.16b v2, #1 +; CHECK-NEXT: ldr w10, [sp, #8] ; CHECK-NEXT: mov.b v1[1], w8 ; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: movi.16b v2, #1 ; CHECK-NEXT: mov.b v0[2], w2 ; CHECK-NEXT: mov.b v1[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] @@ -290,66 +289,65 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) { ; CHECK-LABEL: sext_v32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr s0, [sp, #64] +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov.b v1[1], w1 ; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: mov.b v0[1], w8 ; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: mov.b v1[2], w2 +; CHECK-NEXT: mov.b v0[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 +; CHECK-NEXT: mov.b v1[3], w3 +; CHECK-NEXT: mov.b v0[3], w8 ; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: mov.b v1[4], w4 +; CHECK-NEXT: mov.b v0[4], w8 ; CHECK-NEXT: ldr w8, [sp, #104] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: mov.b v1[5], w5 +; CHECK-NEXT: mov.b v0[5], w8 ; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: mov.b v1[6], w6 +; CHECK-NEXT: mov.b v0[6], w8 ; CHECK-NEXT: ldr w8, [sp, #120] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 +; CHECK-NEXT: mov.b v1[7], w7 +; CHECK-NEXT: mov.b v0[7], w8 ; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: mov.b v1[8], w9 ; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 +; CHECK-NEXT: mov.b v0[8], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: mov.b v1[9], w10 ; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v1[9], w8 +; CHECK-NEXT: mov.b v0[9], w8 ; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: mov.b v1[10], w9 ; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: mov.b v0[10], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: mov.b v1[11], w10 ; CHECK-NEXT: ldr w10, [sp, #40] -; CHECK-NEXT: mov.b v1[11], w8 +; CHECK-NEXT: mov.b v0[11], w8 ; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: mov.b v1[12], w9 ; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: mov.b v1[12], w8 +; CHECK-NEXT: mov.b v0[12], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 +; CHECK-NEXT: mov.b v1[13], w10 ; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: mov.b v1[13], w8 +; CHECK-NEXT: mov.b v0[13], w8 ; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 -; CHECK-NEXT: mov.b v1[14], w8 +; CHECK-NEXT: mov.b v1[14], w9 +; CHECK-NEXT: mov.b v0[14], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: mov.b v1[15], w8 -; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: mov.b v1[15], w10 +; CHECK-NEXT: mov.b v0[15], w8 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: cmlt.16b v0, v0, #0 -; CHECK-NEXT: cmlt.16b v1, v1, #0 +; CHECK-NEXT: shl.16b v2, v0, #7 +; CHECK-NEXT: cmlt.16b v0, v1, #0 +; CHECK-NEXT: cmlt.16b v1, v2, #0 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -358,131 +356,128 @@ define <64 x i8> @zext_v64i1(<64 x i1> %arg) { ; CHECK-LABEL: zext_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #320] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w8 +; CHECK-NEXT: ldr s0, [sp, #320] +; CHECK-NEXT: fmov s4, w0 +; CHECK-NEXT: ldr s2, [sp, #192] +; CHECK-NEXT: ldr s1, [sp, #64] ; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #352] -; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w9, [sp, #328] +; CHECK-NEXT: ldr w10, [sp, #200] ; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] -; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v2[2], w10 +; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v0[1], w9 +; CHECK-NEXT: ldr w9, [sp, #336] +; CHECK-NEXT: mov.b v2[1], w10 ; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] +; CHECK-NEXT: mov.b v4[1], w1 +; CHECK-NEXT: ldr w11, [sp, #96] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #208] +; CHECK-NEXT: mov.b v0[2], w9 +; CHECK-NEXT: ldr w9, [sp, #344] +; CHECK-NEXT: ldr w12, [sp, #104] +; CHECK-NEXT: mov.b v2[2], w8 +; CHECK-NEXT: ldr w8, [sp, #352] +; CHECK-NEXT: mov.b v4[2], w2 +; CHECK-NEXT: ldr w13, [sp, #112] ; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v3[4], w11 +; CHECK-NEXT: ldr w10, [sp, #216] +; CHECK-NEXT: mov.b v0[3], w9 +; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: ldr w14, [sp, #120] +; CHECK-NEXT: mov.b v2[3], w10 +; CHECK-NEXT: ldr w10, [sp, #368] +; CHECK-NEXT: mov.b v4[3], w3 +; CHECK-NEXT: ldr w15, [sp, #128] +; CHECK-NEXT: mov.b v1[4], w11 ; CHECK-NEXT: ldr w11, [sp, #224] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: mov.b v0[4], w8 +; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: ldr w16, [sp, #136] ; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v3[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] -; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #248] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v3[9], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #272] -; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] -; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] -; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] -; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] -; CHECK-NEXT: mov.b v0[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: mov.b v3[12], w15 -; CHECK-NEXT: mov.b v0[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v3[13], w9 +; CHECK-NEXT: ldr w11, [sp, #384] +; CHECK-NEXT: mov.b v4[4], w4 +; CHECK-NEXT: mov.b v1[5], w12 +; CHECK-NEXT: ldr w12, [sp, #232] +; CHECK-NEXT: mov.b v0[5], w9 +; CHECK-NEXT: ldr w9, [sp, #392] +; CHECK-NEXT: movi.16b v5, #1 +; CHECK-NEXT: mov.b v2[5], w12 +; CHECK-NEXT: ldr w12, [sp, #400] +; CHECK-NEXT: mov.b v4[5], w5 +; CHECK-NEXT: mov.b v1[6], w13 +; CHECK-NEXT: ldr w13, [sp, #240] +; CHECK-NEXT: mov.b v0[6], w10 +; CHECK-NEXT: ldr w10, [sp, #408] +; CHECK-NEXT: mov.b v2[6], w13 +; CHECK-NEXT: ldr w13, [sp, #416] +; CHECK-NEXT: mov.b v4[6], w6 +; CHECK-NEXT: mov.b v1[7], w14 +; CHECK-NEXT: ldr w14, [sp, #248] +; CHECK-NEXT: mov.b v0[7], w8 +; CHECK-NEXT: ldr w8, [sp, #424] +; CHECK-NEXT: mov.b v2[7], w14 +; CHECK-NEXT: ldr w14, [sp, #432] +; CHECK-NEXT: mov.b v4[7], w7 +; CHECK-NEXT: mov.b v1[8], w15 +; CHECK-NEXT: ldr w15, [sp, #256] +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: ldr w11, [sp] +; CHECK-NEXT: mov.b v2[8], w15 +; CHECK-NEXT: ldr w15, [sp, #440] +; CHECK-NEXT: mov.b v4[8], w11 +; CHECK-NEXT: ldr w11, [sp, #144] +; CHECK-NEXT: mov.b v1[9], w16 +; CHECK-NEXT: ldr w16, [sp, #264] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #8] +; CHECK-NEXT: mov.b v2[9], w16 +; CHECK-NEXT: ldr w16, [sp, #272] +; CHECK-NEXT: mov.b v4[9], w9 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v0[10], w12 +; CHECK-NEXT: ldr w12, [sp, #280] +; CHECK-NEXT: mov.b v1[10], w11 +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: mov.b v2[10], w16 +; CHECK-NEXT: mov.b v4[10], w9 +; CHECK-NEXT: ldr w9, [sp, #24] +; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: ldr w10, [sp, #288] +; CHECK-NEXT: mov.b v1[11], w11 +; CHECK-NEXT: ldr w11, [sp, #160] +; CHECK-NEXT: mov.b v2[11], w12 +; CHECK-NEXT: mov.b v4[11], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov.b v0[12], w13 +; CHECK-NEXT: mov.b v1[12], w11 +; CHECK-NEXT: ldr w11, [sp, #168] +; CHECK-NEXT: mov.b v2[12], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v4[12], w9 ; CHECK-NEXT: ldr w9, [sp, #304] ; CHECK-NEXT: mov.b v0[13], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov.b v1[13], w11 +; CHECK-NEXT: ldr w11, [sp, #176] +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: ldr w10, [sp, #184] +; CHECK-NEXT: mov.b v4[13], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v2[13], w13 -; CHECK-NEXT: mov.b v3[14], w16 -; CHECK-NEXT: mov.b v0[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v1[14], w10 +; CHECK-NEXT: mov.b v0[14], w14 +; CHECK-NEXT: mov.b v1[14], w11 ; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: movi.16b v4, #1 -; CHECK-NEXT: mov.b v0[15], w8 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: mov.b v3[15], w12 -; CHECK-NEXT: and.16b v0, v0, v4 -; CHECK-NEXT: and.16b v1, v1, v4 -; CHECK-NEXT: and.16b v2, v2, v4 -; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: ldr w9, [sp, #312] +; CHECK-NEXT: mov.b v4[14], w8 +; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: mov.b v0[15], w15 +; CHECK-NEXT: mov.b v1[15], w10 +; CHECK-NEXT: mov.b v2[15], w9 +; CHECK-NEXT: mov.b v4[15], w8 +; CHECK-NEXT: and.16b v3, v0, v5 +; CHECK-NEXT: and.16b v1, v1, v5 +; CHECK-NEXT: and.16b v2, v2, v5 +; CHECK-NEXT: and.16b v0, v4, v5 ; CHECK-NEXT: ret %res = zext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res @@ -491,133 +486,130 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-LABEL: sext_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: ldr s0, [sp, #320] ; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #72] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #80] -; CHECK-NEXT: mov.b v0[1], w8 -; CHECK-NEXT: ldr w8, [sp, #200] -; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr s2, [sp, #192] +; CHECK-NEXT: ldr s1, [sp, #64] +; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w9, [sp, #328] +; CHECK-NEXT: ldr w10, [sp, #200] +; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v0[1], w9 ; CHECK-NEXT: ldr w9, [sp, #336] +; CHECK-NEXT: mov.b v2[1], w10 +; CHECK-NEXT: ldr w10, [sp, #88] ; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #88] -; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: ldr w11, [sp, #96] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #208] ; CHECK-NEXT: mov.b v0[2], w9 -; CHECK-NEXT: ldr w9, [sp, #208] -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #352] +; CHECK-NEXT: ldr w9, [sp, #344] +; CHECK-NEXT: ldr w12, [sp, #104] +; CHECK-NEXT: mov.b v2[2], w8 +; CHECK-NEXT: ldr w8, [sp, #352] ; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #96] -; CHECK-NEXT: mov.b v2[2], w9 +; CHECK-NEXT: ldr w13, [sp, #112] +; CHECK-NEXT: mov.b v1[3], w10 +; CHECK-NEXT: ldr w10, [sp, #216] +; CHECK-NEXT: mov.b v0[3], w9 ; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v0[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v1[3], w11 -; CHECK-NEXT: ldr w13, [sp, #104] +; CHECK-NEXT: ldr w14, [sp, #120] +; CHECK-NEXT: mov.b v2[3], w10 +; CHECK-NEXT: ldr w10, [sp, #368] ; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w11, [sp, #368] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w14, [sp, #112] -; CHECK-NEXT: mov.b v0[4], w10 -; CHECK-NEXT: ldr w10, [sp, #224] -; CHECK-NEXT: mov.b v1[4], w12 +; CHECK-NEXT: ldr w15, [sp, #128] +; CHECK-NEXT: mov.b v1[4], w11 +; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: mov.b v0[4], w8 ; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: ldr w16, [sp, #136] +; CHECK-NEXT: mov.b v2[4], w11 +; CHECK-NEXT: ldr w11, [sp, #384] ; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w15, [sp, #120] -; CHECK-NEXT: mov.b v2[4], w10 -; CHECK-NEXT: ldr w12, [sp, #384] +; CHECK-NEXT: mov.b v1[5], w12 +; CHECK-NEXT: ldr w12, [sp, #232] ; CHECK-NEXT: mov.b v0[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] -; CHECK-NEXT: mov.b v1[5], w13 -; CHECK-NEXT: ldr w16, [sp, #128] +; CHECK-NEXT: ldr w9, [sp, #392] +; CHECK-NEXT: mov.b v2[5], w12 +; CHECK-NEXT: ldr w12, [sp, #400] ; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w10, [sp, #392] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w13, [sp, #400] -; CHECK-NEXT: mov.b v0[6], w11 -; CHECK-NEXT: ldr w11, [sp, #240] -; CHECK-NEXT: mov.b v1[6], w14 -; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: mov.b v1[6], w13 +; CHECK-NEXT: ldr w13, [sp, #240] +; CHECK-NEXT: mov.b v0[6], w10 +; CHECK-NEXT: ldr w10, [sp, #408] +; CHECK-NEXT: mov.b v2[6], w13 +; CHECK-NEXT: ldr w13, [sp, #416] ; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w14, [sp, #416] -; CHECK-NEXT: mov.b v2[6], w11 -; CHECK-NEXT: ldr w11, [sp, #424] +; CHECK-NEXT: mov.b v1[7], w14 +; CHECK-NEXT: ldr w14, [sp, #248] ; CHECK-NEXT: mov.b v0[7], w8 -; CHECK-NEXT: ldr w8, [sp, #248] -; CHECK-NEXT: mov.b v1[7], w15 -; CHECK-NEXT: ldr w15, [sp, #432] +; CHECK-NEXT: ldr w8, [sp, #424] +; CHECK-NEXT: mov.b v2[7], w14 +; CHECK-NEXT: ldr w14, [sp, #432] ; CHECK-NEXT: mov.b v3[7], w7 -; CHECK-NEXT: mov.b v2[7], w8 -; CHECK-NEXT: ldr w8, [sp] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #256] -; CHECK-NEXT: mov.b v1[8], w16 -; CHECK-NEXT: ldr w16, [sp, #440] -; CHECK-NEXT: mov.b v3[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v2[8], w12 -; CHECK-NEXT: ldr w12, [sp, #8] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #264] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #272] -; CHECK-NEXT: mov.b v3[9], w12 -; CHECK-NEXT: ldr w12, [sp, #144] -; CHECK-NEXT: mov.b v2[9], w10 -; CHECK-NEXT: ldr w10, [sp, #16] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] -; CHECK-NEXT: mov.b v1[10], w12 -; CHECK-NEXT: ldr w12, [sp, #152] -; CHECK-NEXT: mov.b v3[10], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v0[11], w9 -; CHECK-NEXT: ldr w9, [sp, #288] -; CHECK-NEXT: mov.b v1[11], w12 -; CHECK-NEXT: ldr w12, [sp, #296] -; CHECK-NEXT: mov.b v3[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v2[11], w13 -; CHECK-NEXT: mov.b v0[12], w14 -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v3[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v2[12], w9 +; CHECK-NEXT: mov.b v1[8], w15 +; CHECK-NEXT: ldr w15, [sp, #256] +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: ldr w11, [sp] +; CHECK-NEXT: mov.b v2[8], w15 +; CHECK-NEXT: ldr w15, [sp, #440] +; CHECK-NEXT: mov.b v3[8], w11 +; CHECK-NEXT: ldr w11, [sp, #144] +; CHECK-NEXT: mov.b v1[9], w16 +; CHECK-NEXT: ldr w16, [sp, #264] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #8] +; CHECK-NEXT: mov.b v2[9], w16 +; CHECK-NEXT: ldr w16, [sp, #272] +; CHECK-NEXT: mov.b v3[9], w9 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v0[10], w12 +; CHECK-NEXT: ldr w12, [sp, #280] +; CHECK-NEXT: mov.b v1[10], w11 +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: mov.b v2[10], w16 +; CHECK-NEXT: mov.b v3[10], w9 +; CHECK-NEXT: ldr w9, [sp, #24] +; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: ldr w10, [sp, #288] +; CHECK-NEXT: mov.b v1[11], w11 +; CHECK-NEXT: ldr w11, [sp, #160] +; CHECK-NEXT: mov.b v2[11], w12 +; CHECK-NEXT: mov.b v3[11], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov.b v0[12], w13 +; CHECK-NEXT: mov.b v1[12], w11 +; CHECK-NEXT: ldr w11, [sp, #168] +; CHECK-NEXT: mov.b v2[12], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v3[12], w9 ; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v0[13], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: mov.b v0[13], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov.b v1[13], w11 +; CHECK-NEXT: ldr w11, [sp, #176] +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: ldr w10, [sp, #184] ; CHECK-NEXT: mov.b v3[13], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v2[13], w12 -; CHECK-NEXT: mov.b v0[14], w15 -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: ldr w10, [sp, #184] +; CHECK-NEXT: mov.b v0[14], w14 +; CHECK-NEXT: mov.b v1[14], w11 +; CHECK-NEXT: mov.b v2[14], w9 +; CHECK-NEXT: ldr w9, [sp, #312] ; CHECK-NEXT: mov.b v3[14], w8 ; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: mov.b v0[15], w16 +; CHECK-NEXT: mov.b v0[15], w15 ; CHECK-NEXT: mov.b v1[15], w10 +; CHECK-NEXT: mov.b v2[15], w9 ; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w11 ; CHECK-NEXT: shl.16b v4, v0, #7 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: shl.16b v3, v3, #7 ; CHECK-NEXT: shl.16b v2, v2, #7 -; CHECK-NEXT: cmlt.16b v0, v3, #0 +; CHECK-NEXT: shl.16b v0, v3, #7 ; CHECK-NEXT: cmlt.16b v1, v1, #0 ; CHECK-NEXT: cmlt.16b v2, v2, #0 +; CHECK-NEXT: cmlt.16b v0, v0, #0 ; CHECK-NEXT: cmlt.16b v3, v4, #0 ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -217,10 +217,8 @@ define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind { ; CHECK-LABEL: sqdmulh_1s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: sqdmulh s0, s0, s1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -291,10 +289,8 @@ define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind { ; CHECK-LABEL: sqrdmulh_1s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: sqrdmulh s0, s0, s1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll --- a/llvm/test/CodeGen/AArch64/dp1.ll +++ b/llvm/test/CodeGen/AArch64/dp1.ll @@ -241,8 +241,7 @@ ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: adrp x8, :got:var64 ; CHECK-GISEL-NEXT: ldr x8, [x8, :got_lo12:var64] -; CHECK-GISEL-NEXT: ldr x9, [x8] -; CHECK-GISEL-NEXT: fmov d0, x9 +; CHECK-GISEL-NEXT: ldr d0, [x8] ; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b ; CHECK-GISEL-NEXT: uaddlv h0, v0.8b ; CHECK-GISEL-NEXT: fmov w9, s0 diff --git a/llvm/test/CodeGen/AArch64/neon-dotpattern.ll b/llvm/test/CodeGen/AArch64/neon-dotpattern.ll --- a/llvm/test/CodeGen/AArch64/neon-dotpattern.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotpattern.ll @@ -4,11 +4,9 @@ define fastcc void @test_sdot_v4i8(i8* noalias nocapture %0, i8* noalias nocapture readonly %1, i8* noalias nocapture readonly %2) { ; CHECK-LABEL: test_sdot_v4i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr w8, [x2] ; CHECK-NEXT: dup v0.2s, wzr -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: ldr s1, [x2] +; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: str w8, [x0] @@ -51,11 +49,9 @@ define fastcc void @test_udot_v4i8(i8* noalias nocapture %0, i8* noalias nocapture readonly %1, i8* noalias nocapture readonly %2) { ; CHECK-LABEL: test_udot_v4i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr w8, [x2] ; CHECK-NEXT: dup v0.2s, wzr -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: ldr s1, [x2] +; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: str w8, [x0] diff --git a/llvm/test/CodeGen/AArch64/neon-extadd.ll b/llvm/test/CodeGen/AArch64/neon-extadd.ll --- a/llvm/test/CodeGen/AArch64/neon-extadd.ll +++ b/llvm/test/CodeGen/AArch64/neon-extadd.ll @@ -532,69 +532,63 @@ define <16 x i32> @i12(<16 x i12> %s0, <16 x i12> %s1) { ; CHECK-LABEL: i12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr w12, [sp, #32] -; CHECK-NEXT: fmov s5, w0 -; CHECK-NEXT: ldr w15, [sp] +; CHECK-NEXT: ldr w10, [sp, #40] ; CHECK-NEXT: fmov s4, w4 -; CHECK-NEXT: ldr w14, [sp, #40] -; CHECK-NEXT: fmov s0, w12 -; CHECK-NEXT: ldr w16, [sp, #48] -; CHECK-NEXT: fmov s1, w15 -; CHECK-NEXT: ldr w15, [sp, #8] -; CHECK-NEXT: ldr w18, [sp, #16] -; CHECK-NEXT: mov v0.h[1], w14 -; CHECK-NEXT: ldr w17, [sp, #56] -; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: ldr w0, [sp, #24] -; CHECK-NEXT: mov v5.h[1], w1 -; CHECK-NEXT: ldr w13, [sp, #64] -; CHECK-NEXT: ldr w1, [sp, #128] -; CHECK-NEXT: mov v0.h[2], w16 -; CHECK-NEXT: ldr w16, [sp, #96] -; CHECK-NEXT: mov v1.h[2], w18 -; CHECK-NEXT: ldr w10, [sp, #72] -; CHECK-NEXT: mov v5.h[2], w2 -; CHECK-NEXT: ldr w2, [sp, #160] -; CHECK-NEXT: mov v4.h[1], w5 -; CHECK-NEXT: ldr w5, [sp, #168] -; CHECK-NEXT: mov v0.h[3], w17 -; CHECK-NEXT: ldr w14, [sp, #104] -; CHECK-NEXT: mov v1.h[3], w0 -; CHECK-NEXT: ldr w18, [sp, #136] -; CHECK-NEXT: fmov s6, w1 +; CHECK-NEXT: ldr s0, [sp, #32] +; CHECK-NEXT: fmov s5, w0 +; CHECK-NEXT: ldr w13, [sp, #8] +; CHECK-NEXT: ldr s1, [sp] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: ldr w12, [sp, #48] +; CHECK-NEXT: ldr w16, [sp, #16] +; CHECK-NEXT: mov v1.h[1], w13 +; CHECK-NEXT: ldr w14, [sp, #56] +; CHECK-NEXT: ldr w18, [sp, #24] +; CHECK-NEXT: mov v0.h[2], w12 +; CHECK-NEXT: ldr w11, [sp, #72] +; CHECK-NEXT: ldr w13, [sp, #104] +; CHECK-NEXT: mov v1.h[2], w16 +; CHECK-NEXT: ldr w17, [sp, #136] ; CHECK-NEXT: ldr w0, [sp, #176] -; CHECK-NEXT: fmov s7, w16 -; CHECK-NEXT: fmov s16, w13 -; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: mov v0.h[3], w14 ; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: movi v0.4s, #15, msl #8 +; CHECK-NEXT: mov v4.h[1], w5 ; CHECK-NEXT: ldr w12, [sp, #112] -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ldr w17, [sp, #144] -; CHECK-NEXT: mov v6.h[1], w18 -; CHECK-NEXT: ldr w4, [sp, #184] -; CHECK-NEXT: mov v7.h[1], w14 +; CHECK-NEXT: mov v1.h[3], w18 +; CHECK-NEXT: ldr w18, [sp, #168] +; CHECK-NEXT: mov v5.h[1], w1 +; CHECK-NEXT: ldr w15, [sp, #144] +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ldr w16, [sp, #184] +; CHECK-NEXT: movi v0.4s, #15, msl #8 ; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ldr w10, [sp, #120] +; CHECK-NEXT: mov v4.h[2], w6 +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: mov v5.h[2], w2 ; CHECK-NEXT: and v3.16b, v2.16b, v0.16b -; CHECK-NEXT: ldr w11, [sp, #120] ; CHECK-NEXT: and v2.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr w15, [sp, #152] -; CHECK-NEXT: fmov s1, w2 -; CHECK-NEXT: mov v16.h[1], w10 -; CHECK-NEXT: mov v4.h[2], w6 -; CHECK-NEXT: mov v1.h[1], w5 -; CHECK-NEXT: mov v6.h[2], w17 +; CHECK-NEXT: ldr s1, [sp, #160] +; CHECK-NEXT: ldr s6, [sp, #128] +; CHECK-NEXT: ldr s7, [sp, #96] +; CHECK-NEXT: ldr s16, [sp, #64] +; CHECK-NEXT: mov v1.h[1], w18 +; CHECK-NEXT: mov v6.h[1], w17 +; CHECK-NEXT: mov v7.h[1], w13 +; CHECK-NEXT: mov v16.h[1], w11 +; CHECK-NEXT: mov v1.h[2], w0 +; CHECK-NEXT: mov v6.h[2], w15 ; CHECK-NEXT: mov v7.h[2], w12 ; CHECK-NEXT: mov v16.h[2], w9 -; CHECK-NEXT: mov v1.h[2], w0 +; CHECK-NEXT: mov v1.h[3], w16 ; CHECK-NEXT: mov v4.h[3], w7 ; CHECK-NEXT: mov v5.h[3], w3 -; CHECK-NEXT: mov v6.h[3], w15 -; CHECK-NEXT: mov v1.h[3], w4 -; CHECK-NEXT: mov v7.h[3], w11 +; CHECK-NEXT: mov v6.h[3], w14 +; CHECK-NEXT: mov v7.h[3], w10 ; CHECK-NEXT: mov v16.h[3], w8 -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-NEXT: ushll v5.4s, v5.4h, #0 ; CHECK-NEXT: ushll v6.4s, v6.4h, #0 ; CHECK-NEXT: and v17.16b, v1.16b, v0.16b