diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -47,6 +47,31 @@ MULSUBX_OP2, MULADDXI_OP1, MULSUBXI_OP1, + // 24-bit imm add/sub patterns matched by the AArch64 machine combiner. + ADDW_MOVi32imm_OP1, + ADDW_MOVi32imm_OP2, + ADDW_negMOVi32imm_OP1, + ADDW_negMOVi32imm_OP2, + ADDX_StR_MOVi32imm_OP1, + ADDX_StR_MOVi32imm_OP2, + ADDX_StR_negMOVi32imm_OP1, + ADDX_StR_negMOVi32imm_OP2, + ADDX_MOVi64imm_OP1, + ADDX_MOVi64imm_OP2, + ADDX_negMOVi64imm_OP1, + ADDX_negMOVi64imm_OP2, + SUBW_MOVi32imm_OP1, + SUBW_MOVi32imm_OP2, + SUBW_negMOVi32imm_OP1, + SUBW_negMOVi32imm_OP2, + SUBX_StR_MOVi32imm_OP1, + SUBX_StR_MOVi32imm_OP2, + SUBX_StR_negMOVi32imm_OP1, + SUBX_StR_negMOVi32imm_OP2, + SUBX_MOVi64imm_OP1, + SUBX_MOVi64imm_OP2, + SUBX_negMOVi64imm_OP1, + SUBX_negMOVi64imm_OP2, // NEON integers vectors MULADDv8i8_OP1, MULADDv8i8_OP2, diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -265,6 +265,7 @@ enum class CombinerObjective { MustReduceDepth, // The data dependency chain must be improved. MustReduceRegisterPressure, // The register pressure must be reduced. + MustNotExistInLoop, // The pattern must exist outside a loop Default // The critical path must not be lengthened. }; @@ -282,6 +283,31 @@ case MachineCombinerPattern::REASSOC_XY_BCA: case MachineCombinerPattern::REASSOC_XY_BAC: return CombinerObjective::MustReduceRegisterPressure; + case MachineCombinerPattern::ADDW_MOVi32imm_OP1: + case MachineCombinerPattern::ADDW_MOVi32imm_OP2: + case MachineCombinerPattern::ADDW_negMOVi32imm_OP1: + case MachineCombinerPattern::ADDW_negMOVi32imm_OP2: + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP1: + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP2: + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP1: + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP2: + case MachineCombinerPattern::ADDX_MOVi64imm_OP1: + case MachineCombinerPattern::ADDX_MOVi64imm_OP2: + case MachineCombinerPattern::ADDX_negMOVi64imm_OP1: + case MachineCombinerPattern::ADDX_negMOVi64imm_OP2: + case MachineCombinerPattern::SUBW_MOVi32imm_OP1: + case MachineCombinerPattern::SUBW_MOVi32imm_OP2: + case MachineCombinerPattern::SUBW_negMOVi32imm_OP1: + case MachineCombinerPattern::SUBW_negMOVi32imm_OP2: + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP1: + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP2: + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP1: + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP2: + case MachineCombinerPattern::SUBX_MOVi64imm_OP1: + case MachineCombinerPattern::SUBX_MOVi64imm_OP2: + case MachineCombinerPattern::SUBX_negMOVi64imm_OP1: + case MachineCombinerPattern::SUBX_negMOVi64imm_OP2: + return CombinerObjective::MustNotExistInLoop; default: return CombinerObjective::Default; } @@ -597,6 +623,12 @@ verifyPatternOrder(MBB, MI, Patterns); for (auto P : Patterns) { + // Skip this pattern when inside a loop since it might detriment lifting + // of loop invariants + if (getCombinerObjective(P) == CombinerObjective::MustNotExistInLoop && + ML && ML->contains(&MI)) + continue; + SmallVector InsInstrs; SmallVector DelInstrs; DenseMap InstrIdxForVirtReg; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4771,6 +4771,106 @@ } return Found; } + +/// getAddSub24Patterns - Find instructions ADD/SUB instructions that have a +/// 24-bit immediate moved into its operand and change those to make two ADD/SUB +/// instructions with 12-bit immediates encoded. +/// \param Root the current instruction to check if it is an ADD/SUB that can be +/// combined +/// \param [out] Patterns the list of patterns for the pattern evaluator +/// \return true iff there is an ADD/SUB that can be combined +static bool +getAddSub24Patterns(MachineInstr &Root, + SmallVectorImpl &Patterns) { + unsigned Opc = Root.getOpcode(); + MachineBasicBlock &MBB = *Root.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + bool Found = false; + + using MCP = MachineCombinerPattern; + + auto MatchImm = [&](unsigned Imm, MCP Pat, MCP NPat) { + // Only check bits 23:16 (not 23:12) so that a single MOV16-ADD is preferred + // over ADDI-ADDI + if (!(Imm & ~0x00ffffff) && (Imm & 0x00ff0000) && (Imm & 0x00000fff)) { + Patterns.push_back(Pat); + return true; + } + if (!(-Imm & ~0x00ffffff) && (-Imm & 0x00ff0000) && (-Imm & 0x00000fff)) { + Patterns.push_back(NPat); + return true; + } + return false; + }; + + // Match (ADD/SUBW WN (MOVi32imm <24-bit>)) -> + // (ADD/SUBW (ADD/SUBW WN <12-bit> shift.12) <12-bit> shift.0) + auto MatchW = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::MOVi32imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(AddSubOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + // Match (ADD/SUBX XN (SUBREG_TO_REG (MOVi32imm <24-bit>))) -> + // (ADD/SUBX (ADD/SUBX XN <12-bit> shift.12) <12-bit> shift.0) + auto MatchXStR = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::SUBREG_TO_REG)) + return false; + MachineInstr &SubToReg = *MRI.getUniqueVRegDef(AddSubOprd.getReg()); + MachineOperand &SubToRegOprd = SubToReg.getOperand(2); + if (!canCombine(MBB, SubToRegOprd, AArch64::MOVi32imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(SubToRegOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + // Match (ADD/SUBX XN (MOVi64imm <24-bit>)) -> + // (ADD/SUBX (ADD/SUBX XN <12-bit> shift.12) <12-bit> shift.0) + auto MatchXM64 = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::MOVi64imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(AddSubOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + switch (Opc) { + default: + break; + case AArch64::ADDWrr: + Found |= MatchW(1, MCP::ADDW_MOVi32imm_OP1, MCP::ADDW_negMOVi32imm_OP1); + Found |= MatchW(2, MCP::ADDW_MOVi32imm_OP2, MCP::ADDW_negMOVi32imm_OP2); + break; + case AArch64::ADDXrr: + Found |= MatchXM64(1, MCP::ADDX_MOVi64imm_OP1, MCP::ADDX_negMOVi64imm_OP1); + Found |= MatchXM64(2, MCP::ADDX_MOVi64imm_OP2, MCP::ADDX_negMOVi64imm_OP2); + Found |= MatchXStR(1, MCP::ADDX_StR_MOVi32imm_OP1, + MCP::ADDX_StR_negMOVi32imm_OP1); + Found |= MatchXStR(2, MCP::ADDX_StR_MOVi32imm_OP2, + MCP::ADDX_StR_negMOVi32imm_OP2); + break; + case AArch64::SUBWrr: + Found |= MatchW(1, MCP::SUBW_MOVi32imm_OP1, MCP::SUBW_negMOVi32imm_OP1); + Found |= MatchW(2, MCP::SUBW_MOVi32imm_OP2, MCP::SUBW_negMOVi32imm_OP2); + break; + case AArch64::SUBXrr: + Found |= MatchXM64(1, MCP::SUBX_MOVi64imm_OP1, MCP::SUBX_negMOVi64imm_OP1); + Found |= MatchXM64(2, MCP::SUBX_MOVi64imm_OP2, MCP::SUBX_negMOVi64imm_OP2); + Found |= MatchXStR(1, MCP::SUBX_StR_MOVi32imm_OP1, + MCP::SUBX_StR_negMOVi32imm_OP1); + Found |= MatchXStR(2, MCP::SUBX_StR_MOVi32imm_OP2, + MCP::SUBX_StR_negMOVi32imm_OP2); + break; + } + return Found; +} + /// Floating-Point Support /// Find instructions that can be turned into madd. @@ -5094,6 +5194,8 @@ // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; + if (getAddSub24Patterns(Root, Patterns)) + return true; // Floating point patterns if (getFMULPatterns(Root, Patterns)) return true; @@ -5347,6 +5449,146 @@ return MUL; } +/// genAddSub24BitImm - Creates two (ADD|SUB)(W|X)ri instructions that take the +/// high and low bits respectively of a 24-bit immediate. Constrains the +/// register class as needed. Adds the new instructions to the insert list and +/// returns the move immediate instruction pointer so that the caller add it to +/// the delete list. +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param ImmInst is the MOVi(32|64)imm instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param Imm is the immediate value which uses at least 13-bits and at most +/// 24-bits +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr * +genAddSub24BitImm(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + MachineInstr &ImmInst, unsigned IdxRootOpd, unsigned Imm, + unsigned NewOpc, const TargetRegisterClass *RC, + SmallVectorImpl &InsInstrs) { + unsigned ImmHi = (Imm >> 12) & 0x0fff, ImmLo = Imm & 0x0fff; + unsigned IdxOtherOpd = IdxRootOpd == 1 ? 2 : 1; + Register ResultReg = Root.getOperand(0).getReg(); + Register ImmReg = Root.getOperand(IdxRootOpd).getReg(); + bool ImmIsKill = Root.getOperand(IdxRootOpd).isKill(); + Register SrcReg = Root.getOperand(IdxOtherOpd).getReg(); + bool SrcIsKill = Root.getOperand(IdxOtherOpd).isKill(); + + if (Register::isVirtualRegister(ResultReg)) + MRI.constrainRegClass(ResultReg, RC); + if (Register::isVirtualRegister(ImmReg)) + MRI.constrainRegClass(ImmReg, RC); + if (Register::isVirtualRegister(SrcReg)) + MRI.constrainRegClass(SrcReg, RC); + + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(NewOpc), ImmReg) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(ImmHi) + .addImm(12); + MachineInstrBuilder MIB2 = + BuildMI(MF, Root.getDebugLoc(), TII->get(NewOpc), ResultReg) + .addReg(ImmReg, getKillRegState(ImmIsKill)) + .addImm(ImmLo) + .addImm(0); + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + return &ImmInst; +} + +/// genAddSubMovImm - Generate two ADD/SUB immediate instructions from an +/// ADD/SUB instruction has a 24-bit value moved into one of the operands. This +/// reduces the final assembly when the 24-bit immediate would have required two +/// MOV immediate instructions. +/// This function extracts the move immediate instruction then delegates work to +/// genAddSub24BitImm. +/// \example +/// \code +/// I = MOVi(32|64)imm N:<24-bit imm> +/// V = (ADD|SUB)(W|X)rr Rn I +/// ==> Tmp = (ADD|SUB)(W|X)rr Rn N:<23:12> lsl.12 +/// ==> V = (ADD|SUB)(W|X)rr Rn N:<11:0> lsl.0 +/// \endcode +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param Negate is true if the immediate must be negated to become 24-bits +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr * +genAddSubMovImm(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + unsigned IdxRootOpd, unsigned NewOpc, + const TargetRegisterClass *RC, bool Negate, + SmallVectorImpl &InsInstrs) { + MachineInstr &ImmInst = *MRI.getVRegDef(Root.getOperand(IdxRootOpd).getReg()); + unsigned Imm = ImmInst.getOperand(1).getImm(); + if (Negate) + Imm = -Imm; + return genAddSub24BitImm(MF, MRI, TII, Root, ImmInst, IdxRootOpd, Imm, NewOpc, + RC, InsInstrs); +} + +/// genAddSubStR - Generate two ADD/SUB immediate instructions from an ADD/SUB +/// instruction has a 24-bit value moved into one of the operands with an +/// intermediate SUBREG_TO_REG step. This reduces the final assembly when the +/// 24-bit immediate would have required two MOV immediate instructions. +/// This function extracts the SUBREG_TO_REG and move immediate instructions, +/// deletes the SUBREG_TO_REG, then delegates work to genAddSub24BitImm. +/// \example +/// \code +/// I = MOVi32imm N:<24-bit imm> +/// S = SUBREG_TO_REG I +/// V = (ADD|SUB)Xrr Rn S +/// ==> Tmp = (ADD|SUB)Xrr Rn N:<23:12> lsl.12 +/// ==> V = (ADD|SUB)Xrr Rn N:<11:0> lsl.0 +/// \endcode +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param Negate is true if the immediate must be negated to become 24-bits +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \param [out] DelInstrs is a vector that will contain the SUBREG_TO_REG +/// instruction that could be removed +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr *genAddSubStR(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, + MachineInstr &Root, unsigned IdxRootOpd, + unsigned NewOpc, + const TargetRegisterClass *RC, bool Negate, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs) { + MachineInstr &SubToReg = + *MRI.getVRegDef(Root.getOperand(IdxRootOpd).getReg()); + MachineInstr &ImmInst = *MRI.getVRegDef(SubToReg.getOperand(2).getReg()); + DelInstrs.push_back(&SubToReg); + unsigned Imm = ImmInst.getOperand(1).getImm(); + if (Negate) + Imm = -Imm; + return genAddSub24BitImm(MF, MRI, TII, Root, ImmInst, IdxRootOpd, Imm, NewOpc, + RC, InsInstrs); +} + /// When getMachineCombinerPatterns() finds potential patterns, /// this function generates the instructions that could replace the /// original code sequence @@ -5535,6 +5777,103 @@ break; } + case MachineCombinerPattern::ADDW_MOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDW_MOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDW_negMOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDW_negMOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_MOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDX_MOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDX_negMOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_negMOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBW_MOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBW_MOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBW_negMOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBW_negMOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBX_MOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBX_MOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBX_negMOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBX_negMOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::MULADDv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir @@ -0,0 +1,351 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +--- +name: reject_16bit +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_16bit + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4369 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], killed [[MOVi32imm]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 4369 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: reject_16bit_neg +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_16bit_neg + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -4369 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], killed [[MOVi32imm]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -4369 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: reject_16bit_X +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_16bit_X + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4369 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], killed [[SUBREG_TO_REG]] + ; CHECK-NEXT: $x0 = COPY [[ADDXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 4369 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: reject_25bit +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: reject_25bit + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 17895697 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], killed [[MOVi32imm]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 17895697 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_flip +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_flip_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = ADDWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: addl_flip +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr killed %2, %0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: addl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr %0, killed %1 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: addl_flip_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr killed %1, %0 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... + + +--- +name: subi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = SUBWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_flip +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = SUBWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = SUBWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_flip_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = SUBWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = SUBXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: subl_flip +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = SUBXrr killed %2, %0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: subl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = SUBXrr %0, killed %1 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: subl_flip_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = SUBXrr killed %1, %0 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-imm-reject-loop.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-imm-reject-loop.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-imm-reject-loop.mir @@ -0,0 +1,123 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +# Check to ensure that an add/sub with a 24-bit immediate won't be turned into +# two addi/subi instructions when inside the loop. +# The important check is that the following MIR will still generate the following: +# [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 9961536 +# [[ADDWrr:%[0-9]+]]:gpr32 = nsw ADDWrr killed %13, killed [[MOVi32imm]] + + +--- | + define dso_local void @reject_inside_loop(i32 %n, i32* nocapture readonly %x, i32* noalias nocapture writeonly %y) local_unnamed_addr #0 { + entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv2 = phi i32* [ %scevgep3, %for.body ], [ %x, %for.body.preheader ] + %lsr.iv1 = phi i32* [ %scevgep, %for.body ], [ %y, %for.body.preheader ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ %wide.trip.count, %for.body.preheader ] + %0 = load i32, i32* %lsr.iv2, align 4, !tbaa !8 + %add = add nsw i32 %0, 9961536 + store i32 %add, i32* %lsr.iv1, align 4, !tbaa !8 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %scevgep = getelementptr i32, i32* %lsr.iv1, i64 1 + %scevgep3 = getelementptr i32, i32* %lsr.iv2, i64 1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !12 + } + + !8 = !{!9, !9, i64 0} + !9 = !{!"int", !10, i64 0} + !10 = !{!"omnipotent char", !11, i64 0} + !11 = !{!"Simple C/C++ TBAA"} + !12 = distinct !{!12, !13, !14} + !13 = !{!"llvm.loop.mustprogress"} + !14 = !{!"llvm.loop.unroll.disable"} + +... +--- +name: reject_inside_loop +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: reject_inside_loop + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY2]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.for.body.preheader: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY2]], 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, killed [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.for.cond.cleanup: + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.for.body: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY1]], %bb.1, %7, %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64sp = PHI [[COPY]], %bb.1, %9, %bb.3 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr64sp = PHI [[SUBREG_TO_REG]], %bb.1, %11, %bb.3 + ; CHECK-NEXT: early-clobber %12:gpr64sp, %13:gpr32 = LDRWpost [[PHI]], 4 :: (load (s32) from %ir.lsr.iv2, !tbaa !0) + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 9961536 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = nsw ADDWrr killed %13, killed [[MOVi32imm]] + ; CHECK-NEXT: early-clobber %16:gpr64sp = STRWpost killed [[ADDWrr]], [[PHI1]], 4 :: (store (s32) into %ir.lsr.iv1, !tbaa !0) + ; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = nsw SUBSXri [[PHI2]], 1, 0, implicit-def dead $nzcv + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64all = COPY [[SUBSXri]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64all = COPY %16 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64all = COPY %12 + ; CHECK-NEXT: CBZX [[SUBSXri]], %bb.2 + ; CHECK-NEXT: B %bb.3 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $w0, $x1, $x2 + %9:gpr64 = COPY $x2 + %8:gpr64 = COPY $x1 + %7:gpr32common = COPY $w0 + %10:gpr32 = SUBSWri %7, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3 + %11:gpr32 = ORRWrs $wzr, %7, 0 + %0:gpr64all = SUBREG_TO_REG 0, killed %11, %subreg.sub_32 + B %bb.3 + + bb.2.for.cond.cleanup: + RET_ReallyLR + + bb.3.for.body: + successors: %bb.2, %bb.3 + %1:gpr64sp = PHI %8, %bb.1, %6, %bb.3 + %2:gpr64sp = PHI %9, %bb.1, %5, %bb.3 + %3:gpr64sp = PHI %0, %bb.1, %4, %bb.3 + early-clobber %12:gpr64sp, %13:gpr32 = LDRWpost %1, 4 :: (load (s32) from %ir.lsr.iv2, !tbaa !8) + %14:gpr32 = MOVi32imm 9961536 + %15:gpr32 = nsw ADDWrr killed %13, killed %14 + early-clobber %16:gpr64sp = STRWpost killed %15, %2, 4 :: (store (s32) into %ir.lsr.iv1, !tbaa !8) + %17:gpr64 = nsw SUBSXri %3, 1, 0, implicit-def dead $nzcv + %4:gpr64all = COPY %17 + %5:gpr64all = COPY %16 + %6:gpr64all = COPY %12 + CBZX %17, %bb.2 + B %bb.3 +... diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -152,9 +152,8 @@ define i64 @add_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, 11183445 ret i64 %b @@ -163,9 +162,8 @@ define i32 @add_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, 11183445 ret i32 %b @@ -174,9 +172,8 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, -11183445 ret i64 %b @@ -185,9 +182,8 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, -11183445 ret i32 %b @@ -196,9 +192,8 @@ define i64 @sub_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, 11183445 ret i64 %b @@ -207,9 +202,8 @@ define i32 @sub_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, 11183445 ret i32 %b @@ -218,9 +212,8 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, -11183445 ret i64 %b @@ -229,9 +222,8 @@ define i32 @sub_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, -11183445 ret i32 %b diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -214,10 +214,9 @@ ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #14464 -; CHECK-NEXT: movk w10, #1, lsl #16 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x9, x9, #19, lsl #12 // =77824 +; CHECK-NEXT: add x9, x9, #2176 ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body