diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -47,6 +47,31 @@ MULSUBX_OP2, MULADDXI_OP1, MULSUBXI_OP1, + // 24-bit imm add/sub patterns matched by the AArch64 machine combiner. + ADDW_MOVi32imm_OP1, + ADDW_MOVi32imm_OP2, + ADDW_negMOVi32imm_OP1, + ADDW_negMOVi32imm_OP2, + ADDX_StR_MOVi32imm_OP1, + ADDX_StR_MOVi32imm_OP2, + ADDX_StR_negMOVi32imm_OP1, + ADDX_StR_negMOVi32imm_OP2, + ADDX_MOVi64imm_OP1, + ADDX_MOVi64imm_OP2, + ADDX_negMOVi64imm_OP1, + ADDX_negMOVi64imm_OP2, + SUBW_MOVi32imm_OP1, + SUBW_MOVi32imm_OP2, + SUBW_negMOVi32imm_OP1, + SUBW_negMOVi32imm_OP2, + SUBX_StR_MOVi32imm_OP1, + SUBX_StR_MOVi32imm_OP2, + SUBX_StR_negMOVi32imm_OP1, + SUBX_StR_negMOVi32imm_OP2, + SUBX_MOVi64imm_OP1, + SUBX_MOVi64imm_OP2, + SUBX_negMOVi64imm_OP1, + SUBX_negMOVi64imm_OP2, // NEON integers vectors MULADDv8i8_OP1, MULADDv8i8_OP2, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4771,6 +4771,105 @@ } return Found; } + +/// getAddSub24Patterns - Find instructions ADD/SUB instructions that have a +/// 24-bit immediate moved into its operand and change those to make two ADD/SUB +/// instructions with 12-bit immediates encoded. +/// \param Root the current instruction to check if it is an ADD/SUB that can be +/// combined +/// \param [out] Patterns the list of patterns for the pattern evaluator +/// \return true iff there is an ADD/SUB that can be combined +static bool +getAddSub24Patterns(MachineInstr &Root, + SmallVectorImpl &Patterns) { + unsigned Opc = Root.getOpcode(); + MachineBasicBlock &MBB = *Root.getParent(); + bool Found = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + using MCP = MachineCombinerPattern; + + auto MatchImm = [&](unsigned Imm, MCP Pat, MCP NPat) { + if (!(Imm & ~0x00ffffff) && (Imm & 0x00fff000) && (Imm & 0x00000fff)) { + Patterns.push_back(Pat); + return true; + } + if (!(-Imm & ~0x00ffffff) && (-Imm & 0x00fff000) && (-Imm & 0x00000fff)) { + Patterns.push_back(NPat); + return true; + } + return false; + }; + + // Match (ADD/SUBW WN (MOVi32imm <24-bit>)) -> + // (ADD/SUBW (ADD/SUBW WN <12-bit> shift.12) <12-bit> shift.0) + auto MatchW = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::MOVi32imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(AddSubOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + // Match (ADD/SUBX XN (SUBREG_TO_REG (MOVi32imm <24-bit>))) -> + // (ADD/SUBX (ADD/SUBX XN <12-bit> shift.12) <12-bit> shift.0) + auto MatchXStR = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::SUBREG_TO_REG)) + return false; + MachineInstr &SubToReg = *MRI.getUniqueVRegDef(AddSubOprd.getReg()); + MachineOperand &SubToRegOprd = SubToReg.getOperand(2); + if (!canCombine(MBB, SubToRegOprd, AArch64::MOVi32imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(SubToRegOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + // Match (ADD/SUBX XN (MOVi64imm <24-bit>)) -> + // (ADD/SUBX (ADD/SUBX XN <12-bit> shift.12) <12-bit> shift.0) + auto MatchXM64 = [&](unsigned Oprd, MCP Pat, MCP NPat) { + MachineOperand &AddSubOprd = Root.getOperand(Oprd); + if (!canCombine(MBB, AddSubOprd, AArch64::MOVi64imm)) + return false; + unsigned Imm = + MRI.getUniqueVRegDef(AddSubOprd.getReg())->getOperand(1).getImm(); + return MatchImm(Imm, Pat, NPat); + }; + + switch (Opc) { + default: + break; + case AArch64::ADDWrr: + Found |= MatchW(1, MCP::ADDW_MOVi32imm_OP1, MCP::ADDW_negMOVi32imm_OP1); + Found |= MatchW(2, MCP::ADDW_MOVi32imm_OP2, MCP::ADDW_negMOVi32imm_OP2); + break; + case AArch64::ADDXrr: + Found |= MatchXM64(1, MCP::ADDX_MOVi64imm_OP1, MCP::ADDX_negMOVi64imm_OP1); + Found |= MatchXM64(2, MCP::ADDX_MOVi64imm_OP2, MCP::ADDX_negMOVi64imm_OP2); + Found |= MatchXStR(1, MCP::ADDX_StR_MOVi32imm_OP1, + MCP::ADDX_StR_negMOVi32imm_OP1); + Found |= MatchXStR(2, MCP::ADDX_StR_MOVi32imm_OP2, + MCP::ADDX_StR_negMOVi32imm_OP2); + break; + case AArch64::SUBWrr: + Found |= MatchW(1, MCP::SUBW_MOVi32imm_OP1, MCP::SUBW_negMOVi32imm_OP1); + Found |= MatchW(2, MCP::SUBW_MOVi32imm_OP2, MCP::SUBW_negMOVi32imm_OP2); + break; + case AArch64::SUBXrr: + Found |= MatchXM64(1, MCP::SUBX_MOVi64imm_OP1, MCP::SUBX_negMOVi64imm_OP1); + Found |= MatchXM64(2, MCP::SUBX_MOVi64imm_OP2, MCP::SUBX_negMOVi64imm_OP2); + Found |= MatchXStR(1, MCP::SUBX_StR_MOVi32imm_OP1, + MCP::SUBX_StR_negMOVi32imm_OP1); + Found |= MatchXStR(2, MCP::SUBX_StR_MOVi32imm_OP2, + MCP::SUBX_StR_negMOVi32imm_OP2); + break; + } + return Found; +} + /// Floating-Point Support /// Find instructions that can be turned into madd. @@ -5094,6 +5193,8 @@ // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; + if (getAddSub24Patterns(Root, Patterns)) + return true; // Floating point patterns if (getFMULPatterns(Root, Patterns)) return true; @@ -5347,6 +5448,146 @@ return MUL; } +/// genAddSub24BitImm - Creates two (ADD|SUB)(W|X)ri instructions that take the +/// high and low bits respectively of a 24-bit immediate. Constrains the +/// register class as needed. Adds the new instructions to the insert list and +/// returns the move immediate instruction pointer so that the caller add it to +/// the delete list. +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param ImmInst is the MOVi(32|64)imm instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param Imm is the immediate value which uses at least 13-bits and at most +/// 24-bits +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr * +genAddSub24BitImm(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + MachineInstr &ImmInst, unsigned IdxRootOpd, unsigned Imm, + unsigned NewOpc, const TargetRegisterClass *RC, + SmallVectorImpl &InsInstrs) { + unsigned ImmHi = (Imm >> 12) & 0x0fff, ImmLo = Imm & 0x0fff; + unsigned IdxOtherOpd = IdxRootOpd == 1 ? 2 : 1; + Register ResultReg = Root.getOperand(0).getReg(); + Register ImmReg = Root.getOperand(IdxRootOpd).getReg(); + bool ImmIsKill = Root.getOperand(IdxRootOpd).isKill(); + Register SrcReg = Root.getOperand(IdxOtherOpd).getReg(); + bool SrcIsKill = Root.getOperand(IdxOtherOpd).isKill(); + + if (Register::isVirtualRegister(ResultReg)) + MRI.constrainRegClass(ResultReg, RC); + if (Register::isVirtualRegister(ImmReg)) + MRI.constrainRegClass(ImmReg, RC); + if (Register::isVirtualRegister(SrcReg)) + MRI.constrainRegClass(SrcReg, RC); + + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(NewOpc), ImmReg) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(ImmHi) + .addImm(12); + MachineInstrBuilder MIB2 = + BuildMI(MF, Root.getDebugLoc(), TII->get(NewOpc), ResultReg) + .addReg(ImmReg, getKillRegState(ImmIsKill)) + .addImm(ImmLo) + .addImm(0); + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + return &ImmInst; +} + +/// genAddSubMovImm - Generate two ADD/SUB immediate instructions from an +/// ADD/SUB instruction has a 24-bit value moved into one of the operands. This +/// reduces the final assembly when the 24-bit immediate would have required two +/// MOV immediate instructions. +/// This function extracts the move immediate instruction then delegates work to +/// genAddSub24BitImm. +/// \example +/// \code +/// I = MOVi(32|64)imm N:<24-bit imm> +/// V = (ADD|SUB)(W|X)rr Rn I +/// ==> Tmp = (ADD|SUB)(W|X)rr Rn N:<23:12> lsl.12 +/// ==> V = (ADD|SUB)(W|X)rr Rn N:<11:0> lsl.0 +/// \endcode +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param Negate is true if the immediate must be negated to become 24-bits +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr * +genAddSubMovImm(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + unsigned IdxRootOpd, unsigned NewOpc, + const TargetRegisterClass *RC, bool Negate, + SmallVectorImpl &InsInstrs) { + MachineInstr &ImmInst = *MRI.getVRegDef(Root.getOperand(IdxRootOpd).getReg()); + unsigned Imm = ImmInst.getOperand(1).getImm(); + if (Negate) + Imm = -Imm; + return genAddSub24BitImm(MF, MRI, TII, Root, ImmInst, IdxRootOpd, Imm, NewOpc, + RC, InsInstrs); +} + +/// genAddSubStR - Generate two ADD/SUB immediate instructions from an ADD/SUB +/// instruction has a 24-bit value moved into one of the operands with an +/// intermediate SUBREG_TO_REG step. This reduces the final assembly when the +/// 24-bit immediate would have required two MOV immediate instructions. +/// This function extracts the SUBREG_TO_REG and move immediate instructions, +/// deletes the SUBREG_TO_REG, then delegates work to genAddSub24BitImm. +/// \example +/// \code +/// I = MOVi32imm N:<24-bit imm> +/// S = SUBREG_TO_REG I +/// V = (ADD|SUB)Xrr Rn S +/// ==> Tmp = (ADD|SUB)Xrr Rn N:<23:12> lsl.12 +/// ==> V = (ADD|SUB)Xrr Rn N:<11:0> lsl.0 +/// \endcode +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the (ADD|SUB)(W|X)rr instruction +/// \param IdxRootOpd is the index of the operand that has the SUBREG_TO_REG +/// result +/// \param NewOpc The opcode for the two (ADD|SUB)(W|X)ri instructions +/// \param RC Register class of operands (ADD|SUB)(W|X)ri instructions +/// \param Negate is true if the immediate must be negated to become 24-bits +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated (ADD|SUB)(W|X)ri instructions +/// \param [out] DelInstrs is a vector that will contain the SUBREG_TO_REG +/// instruction that could be removed +/// \return the address of the MOVi(32|64)imm instruction that could be removed +static MachineInstr *genAddSubStR(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, + MachineInstr &Root, unsigned IdxRootOpd, + unsigned NewOpc, + const TargetRegisterClass *RC, bool Negate, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs) { + MachineInstr &SubToReg = + *MRI.getVRegDef(Root.getOperand(IdxRootOpd).getReg()); + MachineInstr &ImmInst = *MRI.getVRegDef(SubToReg.getOperand(2).getReg()); + DelInstrs.push_back(&SubToReg); + unsigned Imm = ImmInst.getOperand(1).getImm(); + if (Negate) + Imm = -Imm; + return genAddSub24BitImm(MF, MRI, TII, Root, ImmInst, IdxRootOpd, Imm, NewOpc, + RC, InsInstrs); +} + /// When getMachineCombinerPatterns() finds potential patterns, /// this function generates the instructions that could replace the /// original code sequence @@ -5535,6 +5776,103 @@ break; } + case MachineCombinerPattern::ADDW_MOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDW_MOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDW_negMOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDW_negMOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_MOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDX_MOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::ADDX_negMOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_negMOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBW_MOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBW_MOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBWri, + &AArch64::GPR32spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBW_negMOVi32imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBW_negMOVi32imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDWri, + &AArch64::GPR32spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBX_MOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBX_MOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs); + break; + case MachineCombinerPattern::SUBX_negMOVi64imm_OP1: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::SUBX_negMOVi64imm_OP2: + MUL = genAddSubMovImm(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs); + break; + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_MOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::ADDX_StR_negMOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_MOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::SUBXri, + &AArch64::GPR64spRegClass, false, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP1: + MUL = genAddSubStR(MF, MRI, TII, Root, 1, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::SUBX_StR_negMOVi32imm_OP2: + MUL = genAddSubStR(MF, MRI, TII, Root, 2, AArch64::ADDXri, + &AArch64::GPR64spRegClass, true, InsInstrs, DelInstrs); + break; + case MachineCombinerPattern::MULADDv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-addsub-24bit-imm.mir @@ -0,0 +1,281 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +--- +name: addi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_flip +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = ADDWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addi_flip_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = ADDWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: addl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: addl_flip +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = ADDXrr killed %2, %0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: addl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr %0, killed %1 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: addl_flip_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr killed %1, %0 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... + + +--- +name: subi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = SUBWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_flip +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBWri1:%[0-9]+]]:gpr32common = SUBWri killed [[SUBWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[SUBWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = SUBWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = SUBWrr %0, killed %1 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subi_flip_negate +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: subi_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri killed [[ADDWri]], 3549, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm -1121757 + %2:gpr32 = SUBWrr killed %1, %0 + $w0 = COPY %2 + RET_ReallyLR implicit $w0 +... +--- +name: subl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = SUBXrr %0, killed %2 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: subl_flip +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_flip + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64common = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri killed [[SUBXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[SUBXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = SUBXrr killed %2, %0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: subl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = SUBXrr %0, killed %1 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: subl_flip_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: subl_flip_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri killed [[ADDXri]], 3549, 0 + ; CHECK-NEXT: $x0 = COPY [[ADDXri1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = SUBXrr killed %1, %0 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -152,9 +152,8 @@ define i64 @add_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, 11183445 ret i64 %b @@ -163,9 +162,8 @@ define i32 @add_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, 11183445 ret i32 %b @@ -174,9 +172,8 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, -11183445 ret i64 %b @@ -185,9 +182,8 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, -11183445 ret i32 %b @@ -196,9 +192,8 @@ define i64 @sub_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, 11183445 ret i64 %b @@ -207,9 +202,8 @@ define i32 @sub_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, 11183445 ret i32 %b @@ -218,9 +212,8 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, -11183445 ret i64 %b @@ -229,9 +222,8 @@ define i32 @sub_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, -11183445 ret i32 %b diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -216,9 +216,9 @@ define zeroext i1 @test16_2(i16 zeroext %x) align 2 { ; CHECK-LABEL: test16_2: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #16882 +; CHECK-NEXT: add w8, w0, #4, lsl #12 ; =16384 ; CHECK-NEXT: mov w9, #40700 -; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: add w8, w8, #498 ; CHECK-NEXT: cmp w9, w8, uxth ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: ret @@ -252,9 +252,9 @@ define zeroext i1 @test16_4(i16 zeroext %x) align 2 { ; CHECK-LABEL: test16_4: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #29985 +; CHECK-NEXT: add w8, w0, #7, lsl #12 ; =28672 ; CHECK-NEXT: mov w9, #15676 -; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: add w8, w8, #1313 ; CHECK-NEXT: cmp w9, w8, uxth ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret @@ -288,10 +288,10 @@ define zeroext i1 @test16_6(i16 zeroext %x) align 2 { ; CHECK-LABEL: test16_6: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #-32194 -; CHECK-NEXT: mov w9, #24320 -; CHECK-NEXT: add w8, w0, w8 -; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: sub w9, w0, #7, lsl #12 ; =28672 +; CHECK-NEXT: mov w8, #24320 +; CHECK-NEXT: sub w9, w9, #3522 +; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: ret entry: @@ -307,9 +307,9 @@ define zeroext i1 @test16_7(i16 zeroext %x) align 2 { ; CHECK-LABEL: test16_7: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #9272 +; CHECK-NEXT: add w8, w0, #2, lsl #12 ; =8192 ; CHECK-NEXT: mov w9, #22619 -; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: add w8, w8, #1080 ; CHECK-NEXT: cmp w9, w8, uxth ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll --- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll +++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: ldr x8, [x8, :got_lo12:g] ; CHECK-NEXT: ldrh w8, [x8] ; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: mov w9, #65535 -; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: add w8, w8, #15, lsl #12 // =61440 +; CHECK-NEXT: add w8, w8, #4095 ; CHECK-NEXT: and w0, w8, w8, lsr #16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll @@ -43,8 +43,8 @@ define i32* @test_array4(i32* %a) { ; CHECK-LABEL: test_array4: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x8, #4104 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #1, lsl #12 ; =4096 +; CHECK-NEXT: add x0, x8, #8 ; CHECK-NEXT: ret %1 = getelementptr inbounds i32, i32* %a, i64 1026 ret i32* %1 diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -497,12 +497,11 @@ define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, <16 x i32>* %ptr) { ; CHECK-LABEL: test_stnp_v16i32_invalid_offset: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #32032 -; CHECK-NEXT: mov w9, #32000 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: stnp q2, q3, [x8] -; CHECK-NEXT: stnp q0, q1, [x9] +; CHECK-NEXT: add x8, x0, #7, lsl #12 ; =28672 +; CHECK-NEXT: add x9, x8, #3360 +; CHECK-NEXT: add x8, x8, #3328 +; CHECK-NEXT: stnp q2, q3, [x9] +; CHECK-NEXT: stnp q0, q1, [x8] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -243,35 +243,35 @@ ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[2] -; CHECK-NEXT: mov w9, #17097 +; CHECK-NEXT: smov w9, v0.h[2] ; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: movk w9, #45590, lsl #16 -; CHECK-NEXT: mov w11, #32767 -; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: mov w8, #17097 +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: add w11, w10, w11 +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: add w12, w10, #7, lsl #12 // =28672 ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: csel w11, w11, w10, lt -; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: and w11, w11, #0xffff8000 -; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: mov w11, #47143 -; CHECK-NEXT: add w9, w13, w9, lsr #31 +; CHECK-NEXT: add w12, w12, #4095 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: csel w12, w12, w10, lt +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: and w12, w12, #0xffff8000 +; CHECK-NEXT: asr w13, w8, #4 +; CHECK-NEXT: sub w10, w10, w12 +; CHECK-NEXT: mov w12, #47143 +; CHECK-NEXT: add w8, w13, w8, lsr #31 ; CHECK-NEXT: mov w13, #23 -; CHECK-NEXT: movk w11, #24749, lsl #16 +; CHECK-NEXT: movk w12, #24749, lsl #16 ; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: smull x9, w12, w11 +; CHECK-NEXT: msub w8, w8, w13, w9 +; CHECK-NEXT: smull x9, w11, w12 ; CHECK-NEXT: lsr x10, x9, #63 ; CHECK-NEXT: asr x9, x9, #43 ; CHECK-NEXT: add w9, w9, w10 ; CHECK-NEXT: mov w10, #5423 ; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 +; CHECK-NEXT: msub w8, w9, w10, w11 ; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -7,9 +7,9 @@ ; CHECK-LABEL: test1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #40000 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x9, x9, #9, lsl #12 // =36864 +; CHECK-NEXT: add x9, x9, #3136 ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %while_body @@ -47,17 +47,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cbz x0, .LBB1_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader -; CHECK-NEXT: mov w9, #40000 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864 +; CHECK-NEXT: mov w9, wzr +; CHECK-NEXT: add x8, x8, #3136 +; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.ge .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 -; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: add w10, w9, #1 +; CHECK-NEXT: stp w10, w9, [x8] +; CHECK-NEXT: mov w9, w10 +; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.lt .LBB1_2 ; CHECK-NEXT: .LBB1_3: // %while_end ; CHECK-NEXT: ret @@ -86,20 +86,20 @@ ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csel x9, x1, x0, ne -; CHECK-NEXT: cbz x9, .LBB2_3 +; CHECK-NEXT: csel x8, x1, x0, ne +; CHECK-NEXT: cbz x8, .LBB2_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader -; CHECK-NEXT: mov w10, #40000 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: cmp w8, w3 +; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864 +; CHECK-NEXT: mov w9, wzr +; CHECK-NEXT: add x8, x8, #3136 +; CHECK-NEXT: cmp w9, w3 ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: .LBB2_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 -; CHECK-NEXT: cmp w8, w3 +; CHECK-NEXT: add w10, w9, #1 +; CHECK-NEXT: stp w10, w9, [x8] +; CHECK-NEXT: mov w9, w10 +; CHECK-NEXT: cmp w9, w3 ; CHECK-NEXT: b.lt .LBB2_2 ; CHECK-NEXT: .LBB2_3: // %while_end ; CHECK-NEXT: ret @@ -141,16 +141,14 @@ ; CHECK-NEXT: .cfi_personality 0, __FrameHandler ; CHECK-NEXT: .cfi_lsda 0, .Lexception0 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: mov w20, wzr -; CHECK-NEXT: mov w21, #40000 ; CHECK-NEXT: .LBB3_1: // %while_cond ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: .Ltmp0: @@ -158,8 +156,9 @@ ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: // %bb.2: // %while_cond_x.split ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: add x8, x0, x21 +; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864 ; CHECK-NEXT: cmp w20, w19 +; CHECK-NEXT: add x8, x8, #3136 ; CHECK-NEXT: str wzr, [x8] ; CHECK-NEXT: b.ge .LBB3_4 ; CHECK-NEXT: // %bb.3: // %while_body @@ -170,7 +169,7 @@ ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_4: // %while_end ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_5: // %cleanup ; CHECK-NEXT: .Ltmp2: @@ -214,10 +213,9 @@ ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #14464 -; CHECK-NEXT: movk w10, #1, lsl #16 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x9, x9, #19, lsl #12 // =77824 +; CHECK-NEXT: add x9, x9, #2176 ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body