diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -47,6 +47,31 @@ MULSUBX_OP2, MULADDXI_OP1, MULSUBXI_OP1, + // 24-bit imm add/sub patterns matched by the AArch64 machine combiner. + ADDW_MOVi32imm_OP1, + ADDW_MOVi32imm_OP2, + ADDW_negMOVi32imm_OP1, + ADDW_negMOVi32imm_OP2, + ADDX_StR_MOVi32imm_OP1, + ADDX_StR_MOVi32imm_OP2, + ADDX_StR_negMOVi32imm_OP1, + ADDX_StR_negMOVi32imm_OP2, + ADDX_MOVi64imm_OP1, + ADDX_MOVi64imm_OP2, + ADDX_negMOVi64imm_OP1, + ADDX_negMOVi64imm_OP2, + SUBW_MOVi32imm_OP1, + SUBW_MOVi32imm_OP2, + SUBW_negMOVi32imm_OP1, + SUBW_negMOVi32imm_OP2, + SUBX_StR_MOVi32imm_OP1, + SUBX_StR_MOVi32imm_OP2, + SUBX_StR_negMOVi32imm_OP1, + SUBX_StR_negMOVi32imm_OP2, + SUBX_MOVi64imm_OP1, + SUBX_MOVi64imm_OP2, + SUBX_negMOVi64imm_OP1, + SUBX_negMOVi64imm_OP2, // NEON integers vectors MULADDv8i8_OP1, MULADDv8i8_OP2, diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -265,6 +265,7 @@ enum class CombinerObjective { MustReduceDepth, // The data dependency chain must be improved. MustReduceRegisterPressure, // The register pressure must be reduced. + MustNotExistInLoop, // The pattern must exist outside a loop Default // The critical path must not be lengthened. }; @@ -282,6 +283,31 @@ case MachineCombinerPattern::REASSOC_XY_BCA: case MachineCombinerPattern::REASSOC_XY_BAC: return CombinerObjective::MustReduceRegisterPressure; + case MachineCombinerPattern::ADDW_MOVi32imm_OP1: + case MachineCombinerPattern::ADDW_MOVi32imm_OP2: + case MachineCombinerPattern::ADDW_negMOVi32imm_OP1: + case MachineCombinerPattern::ADDW_negMOVi32imm_OP2: + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP1: + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP2: + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP1: + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP2: + case MachineCombinerPattern::ADDX_MOVi64imm_OP1: + case MachineCombinerPattern::ADDX_MOVi64imm_OP2: + case MachineCombinerPattern::ADDX_negMOVi64imm_OP1: + case MachineCombinerPattern::ADDX_negMOVi64imm_OP2: + case MachineCombinerPattern::SUBW_MOVi32imm_OP1: + case MachineCombinerPattern::SUBW_MOVi32imm_OP2: + case MachineCombinerPattern::SUBW_negMOVi32imm_OP1: + case MachineCombinerPattern::SUBW_negMOVi32imm_OP2: + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP1: + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP2: + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP1: + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP2: + case MachineCombinerPattern::SUBX_MOVi64imm_OP1: + case MachineCombinerPattern::SUBX_MOVi64imm_OP2: + case MachineCombinerPattern::SUBX_negMOVi64imm_OP1: + case MachineCombinerPattern::SUBX_negMOVi64imm_OP2: + return CombinerObjective::MustNotExistInLoop; default: return CombinerObjective::Default; } @@ -597,6 +623,12 @@ verifyPatternOrder(MBB, MI, Patterns); for (auto P : Patterns) { + // Skip this pattern when inside a loop since it might detriment lifting + // of loop invariants + if (getCombinerObjective(P) == CombinerObjective::MustNotExistInLoop && + ML && ML->contains(&MI)) + continue; + SmallVector InsInstrs; SmallVector DelInstrs; DenseMap InstrIdxForVirtReg; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4771,6 +4771,106 @@ } return Found; } + +/// getAddSub24Patterns - Find instructions ADD/SUB instructions that have a +/// 24-bit immediate moved into its operand and change those to make two ADD/SUB +/// instructions with 12-bit immediates encoded. +/// \param Root the current instruction to check if it is an ADD/SUB that can be +/// combined +/// \param [out] Patterns the list of patterns for the pattern evaluator +/// \return true iff there is an ADD/SUB that can be combined +static bool +getAddSub24Patterns(MachineInstr &Root, + SmallVectorImpl &Patterns) { + unsigned Opc = Root.getOpcode(); + MachineBasicBlock &MBB = *Root.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + bool Found = false; + + using MCP = MachineCombinerPattern; + + auto MatchImm = [&](unsigned Imm, MCP Pat, MCP NPat) { + // Only check bits 23:16 (not 23:12) so that a single MOV16-ADD is preferred + // over ADDI-ADDI + if (!(Imm & ~0x00ffffff) && (Imm & 0x00ff0000) && (Imm & 0x00000fff)) { + Patterns.push_back(Pat); + return true; + } + if (!(-Imm & ~0x00ffffff) && (-Imm & 0x00ff0000) && (-Imm & 0x00000fff)) { + Patterns.push_back(NPat); + return true; + } + return false; + }; + + // Match (ADD/SUBW WN (MOVi32imm <24-bit>)) -> + // (ADD/SUBW (ADD/SUBW WN <12-bit> shift.12) <12-bit> shift.0) + auto MatchW = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::MOVi32imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(AddSubOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + // Match (ADD/SUBX XN (SUBREG_TO_REG (MOVi32imm <24-bit>))) -> + // (ADD/SUBX (ADD/SUBX XN <12-bit> shift.12) <12-bit> shift.0) + auto MatchXStR = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::SUBREG_TO_REG)) + return false; + MachineInstr &SubToReg = *MRI.getUniqueVRegDef(AddSubOprd.getReg()); + MachineOperand &SubToRegOprd = SubToReg.getOperand(2); + if (!canCombine(MBB, SubToRegOprd, AArch64::MOVi32imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(SubToRegOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + // Match (ADD/SUBX XN (MOVi64imm <24-bit>)) -> + // (ADD/SUBX (ADD/SUBX XN <12-bit> shift.12) <12-bit> shift.0) + auto MatchXM64 = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::MOVi64imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(AddSubOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + switch (Opc) { + default: + break; + case AArch64::ADDWrr: + Found |= MatchW(1, MCP::ADDW_MOVi32imm_OP1, MCP::ADDW_negMOVi32imm_OP1); + Found |= MatchW(2, MCP::ADDW_MOVi32imm_OP2, MCP::ADDW_negMOVi32imm_OP2); + break; + case AArch64::ADDXrr: + Found |= MatchXM64(1, MCP::ADDX_MOVi64imm_OP1, MCP::ADDX_negMOVi64imm_OP1); + Found |= MatchXM64(2, MCP::ADDX_MOVi64imm_OP2, MCP::ADDX_negMOVi64imm_OP2); + Found |= MatchXStR(1, MCP::ADDX_StR_MOVi32imm_OP1, + MCP::ADDX_StR_negMOVi32imm_OP1); + Found |= MatchXStR(2, MCP::ADDX_StR_MOVi32imm_OP2, + MCP::ADDX_StR_negMOVi32imm_OP2); + break; + case AArch64::SUBWrr: + Found |= MatchW(1, MCP::SUBW_MOVi32imm_OP1, MCP::SUBW_negMOVi32imm_OP1); + Found |= MatchW(2, MCP::SUBW_MOVi32imm_OP2, MCP::SUBW_negMOVi32imm_OP2); + break; + case AArch64::SUBXrr: + Found |= MatchXM64(1, MCP::SUBX_MOVi64imm_OP1, MCP::SUBX_negMOVi64imm_OP1); + Found |= MatchXM64(2, MCP::SUBX_MOVi64imm_OP2, MCP::SUBX_negMOVi64imm_OP2); + Found |= MatchXStR(1, MCP::SUBX_StR_MOVi32imm_OP1, + MCP::SUBX_StR_negMOVi32imm_OP1); + Found |= MatchXStR(2, MCP::SUBX_StR_MOVi32imm_OP2, + MCP::SUBX_StR_negMOVi32imm_OP2); + break; + } + return Found; +} + /// Floating-Point Support /// Find instructions that can be turned into madd. @@ -5094,6 +5194,8 @@ // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; + if (getAddSub24Patterns(Root, Patterns)) + return true; // Floating point patterns if (getFMULPatterns(Root, Patterns)) return true; @@ -5347,6 +5449,146 @@ return MUL; } +/// genAddSub24BitImm - Creates two (ADD|SUB)(W|X)ri instructions that take the +/// high and low bits respectively of a 24-bit immediate. Constrains the +/// register class as needed. Adds the new instructions to the insert list and +/// returns the move immediate instruction pointer so that the caller add it to +/// the delete list. +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param ImmInst is the MOVi(32|64)imm instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param Imm is the immediate value which uses at least 13-bits and at most +/// 24-bits +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr * +genAddSub24BitImm(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + MachineInstr &ImmInst, unsigned IdxRootOpd, unsigned Imm, + unsigned NewOpc, const TargetRegisterClass *RC, + SmallVectorImpl &InsInstrs) { + unsigned ImmHi = (Imm >> 12) & 0x0fff, ImmLo = Imm & 0x0fff; + unsigned IdxOtherOpd = IdxRootOpd == 1 ? 2 : 1; + Register ResultReg = Root.getOperand(0).getReg(); + Register ImmReg = Root.getOperand(IdxRootOpd).getReg(); + bool ImmIsKill = Root.getOperand(IdxRootOpd).isKill(); + Register SrcReg = Root.getOperand(IdxOtherOpd).getReg(); + bool SrcIsKill = Root.getOperand(IdxOtherOpd).isKill(); + + if (Register::isVirtualRegister(ResultReg)) + MRI.constrainRegClass(ResultReg, RC); + if (Register::isVirtualRegister(ImmReg)) + MRI.constrainRegClass(ImmReg, RC); + if (Register::isVirtualRegister(SrcReg)) + MRI.constrainRegClass(SrcReg, RC); + + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(NewOpc), ImmReg) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(ImmHi) + .addImm(12); + MachineInstrBuilder MIB2 = + BuildMI(MF, Root.getDebugLoc(), TII->get(NewOpc), ResultReg) + .addReg(ImmReg, getKillRegState(ImmIsKill)) + .addImm(ImmLo) + .addImm(0); + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + return &ImmInst; +} + +/// genAddSubMovImm - Generate two ADD/SUB immediate instructions from an +/// ADD/SUB instruction has a 24-bit value moved into one of the operands. This +/// reduces the final assembly when the 24-bit immediate would have required two +/// MOV immediate instructions. +/// This function extracts the move immediate instruction then delegates work to +/// genAddSub24BitImm. +/// \example +/// \code +/// I = MOVi(32|64)imm N:<24-bit imm> +/// V = (ADD|SUB)(W|X)rr Rn I +/// ==> Tmp = (ADD|SUB)(W|X)rr Rn N:<23:12> lsl.12 +/// ==> V = (ADD|SUB)(W|X)rr Rn N:<11:0> lsl.0 +/// \endcode +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param Negate is true if the immediate must be negated to become 24-bits +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr * +genAddSubMovImm(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + unsigned IdxRootOpd, unsigned NewOpc, + const TargetRegisterClass *RC, bool Negate, + SmallVectorImpl &InsInstrs) { + MachineInstr &ImmInst = *MRI.getVRegDef(Root.getOperand(IdxRootOpd).getReg()); + unsigned Imm = ImmInst.getOperand(1).getImm(); + if (Negate) + Imm = -Imm; + return genAddSub24BitImm(MF, MRI, TII, Root, ImmInst, IdxRootOpd, Imm, NewOpc, + RC, InsInstrs); +} + +/// genAddSubStR - Generate two ADD/SUB immediate instructions from an ADD/SUB +/// instruction has a 24-bit value moved into one of the operands with an +/// intermediate SUBREG_TO_REG step. This reduces the final assembly when the +/// 24-bit immediate would have required two MOV immediate instructions. +/// This function extracts the SUBREG_TO_REG and move immediate instructions, +/// deletes the SUBREG_TO_REG, then delegates work to genAddSub24BitImm. +/// \example +/// \code +/// I = MOVi32imm N:<24-bit imm> +/// S = SUBREG_TO_REG I +/// V = (ADD|SUB)Xrr Rn S +/// ==> Tmp = (ADD|SUB)Xrr Rn N:<23:12> lsl.12 +/// ==> V = (ADD|SUB)Xrr Rn N:<11:0> lsl.0 +/// \endcode +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param Negate is true if the immediate must be negated to become 24-bits +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \param [out] DelInstrs is a vector that will contain the SUBREG_TO_REG +/// instruction that could be removed +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr *genAddSubStR(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, + MachineInstr &Root, unsigned IdxRootOpd, + unsigned NewOpc, + const TargetRegisterClass *RC, bool Negate, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs) { + MachineInstr &SubToReg = + *MRI.getVRegDef(Root.getOperand(IdxRootOpd).getReg()); + MachineInstr &ImmInst = *MRI.getVRegDef(SubToReg.getOperand(2).getReg()); + DelInstrs.push_back(&SubToReg); + unsigned Imm = ImmInst.getOperand(1).getImm(); + if (Negate) + Imm = -Imm; + return genAddSub24BitImm(MF, MRI, TII, Root, ImmInst, IdxRootOpd, Imm, NewOpc, + RC, InsInstrs); +} + /// When getMachineCombinerPatterns() finds potential patterns, /// this function generates the instructions that could replace the /// original code sequence @@ -5535,6 +5777,103 @@ break; } + case MachineCombinerPattern::ADDW_MOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDW_MOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDW_negMOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDW_negMOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_MOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDX_MOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDX_negMOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_negMOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBW_MOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBW_MOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBW_negMOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBW_negMOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBX_MOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBX_MOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBX_negMOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBX_negMOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::MULADDv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir @@ -0,0 +1,351 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +--- +name: reject_16bit +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_16bit + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4369 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], killed [[MOVi32imm]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 4369 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: reject_16bit_neg +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_16bit_neg + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -4369 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], killed [[MOVi32imm]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -4369 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: reject_16bit_X +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_16bit_X + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4369 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], killed [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY [[ADDXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 4369 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: reject_25bit +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_25bit + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 17895697 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], killed [[MOVi32imm]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 17895697 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_flip +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_flip_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = ADDWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: addl_flip +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr killed %2, %0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: addl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr %0, killed %1 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: addl_flip_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr killed %1, %0 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... + + +--- +name: subi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = SUBWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_flip +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = SUBWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = SUBWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_flip_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = SUBWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = SUBXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: subl_flip +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = SUBXrr killed %2, %0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: subl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = SUBXrr %0, killed %1 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: subl_flip_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = SUBXrr killed %1, %0 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -152,9 +152,8 @@ define i64 @add_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, 11183445 ret i64 %b @@ -163,9 +162,8 @@ define i32 @add_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, 11183445 ret i32 %b @@ -174,9 +172,8 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, -11183445 ret i64 %b @@ -185,9 +182,8 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, -11183445 ret i32 %b @@ -196,9 +192,8 @@ define i64 @sub_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, 11183445 ret i64 %b @@ -207,9 +202,8 @@ define i32 @sub_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, 11183445 ret i32 %b @@ -218,9 +212,8 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, -11183445 ret i64 %b @@ -229,9 +222,8 @@ define i32 @sub_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, -11183445 ret i32 %b diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -214,10 +214,9 @@ ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #14464 -; CHECK-NEXT: movk w10, #1, lsl #16 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x9, x9, #19, lsl #12 // =77824 +; CHECK-NEXT: add x9, x9, #2176 ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body