Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -85,11 +85,21 @@ /// Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare. /// It holds the register values, the scale value and the displacement. +/// It also holds a descriptor for the expression used to calculate the address +/// from the operands. struct ExtAddrMode { + enum class Formula { + Basic = 0, // BaseReg + ScaledReg * Scale + Displacement + SExtScaledReg = 1, // BaseReg + sext(ScaledReg) * Scale + Displacement + ZExtScaledReg = 2 // BaseReg + zext(ScaledReg) * Scale + Displacement + }; + Register BaseReg; Register ScaledReg; - int64_t Scale; - int64_t Displacement; + int64_t Scale = 0; + int64_t Displacement = 0; + Formula Form = Formula::Basic; + ExtAddrMode() = default; }; //--------------------------------------------------------------------------- @@ -1447,6 +1457,26 @@ return std::nullopt; } + /// Check if it's possible and beneficial to fold the addressing computation + /// `AddrI` into the addressing mode of the load/store instruction `MemI`. The + /// memory instruction is a user of the virtual register `Reg`, which in turn + /// is the ultimate destination of zero or more COPY instructions from the + /// output register of `AddrI`. + /// Return the adddressing mode after folding in `AM`. + virtual bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const { + return false; + } + + /// Emit a load/store instruction with the same value register as `MemI`, but + /// using the address from `AM`. The addressing mode must have been obtained + /// from `canFoldIntoAddr` for the same memory instruction. + virtual MachineInstr *emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const { + llvm_unreachable("target did not implement emitLdStWithAddr()"); + } + /// Returns true if MI's Def is NullValueReg, and the MI /// does not change the Zero value. i.e. cases such as rax = shr rax, X where /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this Index: llvm/include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- llvm/include/llvm/CodeGen/TargetPassConfig.h +++ llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -130,6 +130,11 @@ /// Default setting for -enable-tail-merge on this target. bool EnableTailMerge = true; + /// Enable sinking of instructions in MachineSink where a computation can be + /// folded into the addressing mode of a memory load/store instruction or + /// replace a copy. + bool EnableSinkAndFold = false; + /// Require processing of functions such that callees are generated before /// callers. bool RequireCodeGenSCCOrder = false; @@ -176,6 +181,9 @@ bool getEnableTailMerge() const { return EnableTailMerge; } void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); } + bool getEnableSinkAndFold() const { return EnableSinkAndFold; } + void setEnableSinkAndFold(bool Enable) { setOpt(EnableSinkAndFold, Enable); } + bool requiresCodeGenSCCOrder() const { return RequireCodeGenSCCOrder; } void setRequiresCodeGenSCCOrder(bool Enable = true) { setOpt(RequireCodeGenSCCOrder, Enable); Index: llvm/lib/CodeGen/ImplicitNullChecks.cpp =================================================================== --- llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -372,7 +372,7 @@ if (!MI.mayLoadOrStore() || MI.isPredicable()) return SR_Unsuitable; auto AM = TII->getAddrModeFromMemoryOp(MI, TRI); - if (!AM) + if (!AM || AM->Form != ExtAddrMode::Formula::Basic) return SR_Unsuitable; auto AddrMode = *AM; const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg; Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" @@ -115,6 +116,7 @@ namespace { class MachineSinking : public MachineFunctionPass { + const TargetSubtargetInfo *STI = nullptr; const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; MachineRegisterInfo *MRI = nullptr; // Machine register information @@ -165,7 +167,10 @@ StoreInstrCache; /// Cached BB's register pressure. - std::map> CachedRegisterPressure; + std::map> + CachedRegisterPressure; + + bool EnableSinkAndFold; public: static char ID; // Pass identification @@ -187,6 +192,7 @@ AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); + AU.addRequired(); } void releaseMemory() override { @@ -246,11 +252,17 @@ bool PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB); + bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB); + SmallVector & GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; - std::vector &getBBRegisterPressure(MachineBasicBlock &MBB); + std::vector &getBBRegisterPressure(const MachineBasicBlock &MBB); + + bool registerPressureSetExceedsLimit(unsigned NRegs, + const TargetRegisterClass *RC, + const MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -338,6 +350,225 @@ return true; } +bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, + MachineBasicBlock *MBB) { + if (MI.isCopy() || MI.mayLoadOrStore() || + MI.getOpcode() == TargetOpcode::REG_SEQUENCE) + return false; + + // Don't sink instructions that the target prefers not to sink. + if (!TII->shouldSink(MI)) + return false; + + // Check if it's safe to move the instruction. + bool SawStore = true; + if (!MI.isSafeToMove(AA, SawStore)) + return false; + + // Convergent operations may not be made control-dependent on additional + // values. + if (MI.isConvergent()) + return false; + + // Don't sink defs/uses of hard registers or if the instruction defines more + // than one register. + // Don't sink more than two register uses - it'll cover most of the cases and + // greatly simplifies the register pressure checks. + Register DefReg; + Register UsedRegA, UsedRegB; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() || + MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() || + MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask()) + continue; + if (!MO.isReg()) + return false; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (Reg.isVirtual()) { + if (MO.isDef()) { + if (DefReg) + return false; + DefReg = Reg; + continue; + } + + if (UsedRegA == 0) + UsedRegA = Reg; + else if (UsedRegB == 0) + UsedRegB = Reg; + else + return false; + continue; + } + + if (Reg.isPhysical() && + (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) + continue; + + return false; + } + + // Scan uses of the destination register. Every use, except the last, must be + // a copy, with a chain of copies terminating with either a copy into a hard + // register, or a load/store instruction where the use is part of the + // address (*not* the stored value). + using SinkInfo = std::pair; + SmallVector SinkInto; + SmallVector Worklist; + + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + const TargetRegisterClass *RCA = + UsedRegA == 0 ? nullptr : MRI->getRegClass(UsedRegA); + const TargetRegisterClass *RCB = + UsedRegB == 0 ? nullptr : MRI->getRegClass(UsedRegB); + + Worklist.push_back(DefReg); + while (!Worklist.empty()) { + Register Reg = Worklist.pop_back_val(); + + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + ExtAddrMode MaybeAM; + MachineInstr &UseInst = *MO.getParent(); + if (UseInst.isCopy()) { + Register DstReg; + if (const MachineOperand &O = UseInst.getOperand(0); O.isReg()) + DstReg = O.getReg(); + if (DstReg == 0) + return false; + if (DstReg.isVirtual()) { + Worklist.push_back(DstReg); + continue; + } + // If we are going to replace a copy, the original instruction must be + // as cheap as a copy. + if (!TII->isAsCheapAsAMove(MI)) + return false; + // The hard register must be in the register class of the original + // instruction's destination register. + if (!RC->contains(DstReg)) + return false; + } else if (UseInst.mayLoadOrStore()) { + ExtAddrMode AM; + if (!TII->canFoldIntoAddrMode(UseInst, Reg, MI, AM)) + return false; + MaybeAM = AM; + } else { + return false; + } + + if (UseInst.getParent() != MI.getParent()) { + // If the register class of source of the copy is a superset of any of + // the register classes of the operands of the materialized instruction + // don't consider that live range extended. + const TargetRegisterClass *RCS = + MRI->getRegClass(UseInst.getOperand(1).getReg()); + if (RCA && RCA->hasSuperClassEq(RCS)) + RCA = nullptr; + else if (RCB && RCB->hasSuperClassEq(RCS)) + RCB = nullptr; + if (RCA || RCB) { + if (RCA == nullptr) { + RCA = RCB; + RCB = nullptr; + } + + unsigned NRegs = !!RCA + !!RCB; + if (RCA == RCB) + RCB = nullptr; + + // Check we don't exceed register pressure at the destination. + const MachineBasicBlock &MBB = *UseInst.getParent(); + if (RCB == nullptr) { + if (registerPressureSetExceedsLimit(NRegs, RCA, MBB)) + return false; + } else if (registerPressureSetExceedsLimit(1, RCA, MBB) || + registerPressureSetExceedsLimit(1, RCB, MBB)) { + return false; + } + } + } + + SinkInto.emplace_back(&UseInst, MaybeAM); + } + } + + if (SinkInto.empty()) + return false; + + // Now we know we can fold the instruction in all its users. + if (UsedRegA) + MRI->clearKillFlags(UsedRegA); + if (UsedRegB) + MRI->clearKillFlags(UsedRegB); + + for (auto &[SinkDst, MaybeAM] : SinkInto) { + MachineInstr *New = nullptr; + LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into"; + SinkDst->dump();); + if (SinkDst->isCopy()) { + // Sink a copy of the instruction, replacing a COPY instruction. + MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); + Register DstReg = SinkDst->getOperand(0).getReg(); + TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI); + // If the original instruction did not have source location, reuse a one + // from the COPY. + New = &*std::prev(InsertPt); + if (const DebugLoc &NewLoc = New->getDebugLoc(); !NewLoc) + New->setDebugLoc(SinkDst->getDebugLoc()); + // Sink DBG_VALUEs, which refer to the original instruction's destination + // (DefReg). + MachineBasicBlock &SinkMBB = *SinkDst->getParent(); + auto &DbgUsers = SeenDbgUsers[DefReg]; + for (auto &U : DbgUsers) { + MachineInstr *DbgMI = U.getPointer(); + if (U.getInt()) + continue; + MachineInstr *NewDbgMI = SinkDst->getMF()->CloneMachineInstr(DbgMI); + NewDbgMI->getOperand(0).setReg(DstReg); + SinkMBB.insertAfter(InsertPt, NewDbgMI); + } + } else { + // Fold instruction into the addressing mode a memory instruction. + New = TII->emitLdStWithAddr(*SinkDst, MaybeAM); + } + LLVM_DEBUG(dbgs() << "yielding"; New->dump();); + SinkDst->eraseFromParent(); + } + + MI.eraseFromParent(); + + // Collect instructions that need to be deleted (COPYs). We cannot delete them + // while traversing register uses. + SmallVector CleanupInstrs; + Worklist.push_back(DefReg); + while (!Worklist.empty()) { + Register Reg = Worklist.pop_back_val(); + + for (MachineOperand &MO : MRI->use_operands(Reg)) { + MachineInstr *U = MO.getParent(); + assert((U->isCopy() || U->isDebugInstr()) && + "Only debug uses and copies must remain"); + if (U->isCopy()) { + Worklist.push_back(U->getOperand(0).getReg()); + CleanupInstrs.push_back(U); + } else { + MO.setReg(0); + MO.setSubReg(0); + } + } + } + + // Delete the dead COPYs. + for (MachineInstr *Del : CleanupInstrs) + Del->eraseFromParent(); + + return true; +} + /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. If any use is in the /// definition block, then return false since it is never legal to move def @@ -461,8 +692,9 @@ LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n"); - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); @@ -471,6 +703,8 @@ MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + TargetPassConfig *PassConfig = &getAnalysis(); + EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); bool EverMadeChange = false; @@ -547,8 +781,8 @@ } bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { - // Can't sink anything out of a block that has less than two successors. - if (MBB.succ_size() <= 1 || MBB.empty()) return false; + if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty()) + return false; // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an @@ -579,8 +813,16 @@ continue; } - bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); - if (Joined) { + if (EnableSinkAndFold && PerformSinkAndFold(MI, &MBB)) { + MadeChange = true; + continue; + } + + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1) + continue; + + if (PerformTrivialForwardCoalescing(MI, &MBB)) { MadeChange = true; continue; } @@ -597,7 +839,6 @@ SeenDbgVars.clear(); // recalculate the bb register pressure after sinking one BB. CachedRegisterPressure.clear(); - return MadeChange; } @@ -737,7 +978,7 @@ } std::vector & -MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { +MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { // Currently to save compiling time, MBB's register pressure will not change // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's // register pressure is changed after sinking any instructions into it. @@ -753,10 +994,10 @@ RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(), /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); - for (MachineBasicBlock::iterator MII = MBB.instr_end(), - MIE = MBB.instr_begin(); + for (MachineBasicBlock::const_iterator MII = MBB.instr_end(), + MIE = MBB.instr_begin(); MII != MIE; --MII) { - MachineInstr &MI = *std::prev(MII); + const MachineInstr &MI = *std::prev(MII); if (MI.isDebugInstr() || MI.isPseudoProbe()) continue; RegisterOperands RegOpers; @@ -772,6 +1013,19 @@ return It.first->second; } +bool MachineSinking::registerPressureSetExceedsLimit( + unsigned NRegs, const TargetRegisterClass *RC, + const MachineBasicBlock &MBB) { + unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight; + const int *PS = TRI->getRegClassPressureSets(RC); + std::vector BBRegisterPressure = getBBRegisterPressure(MBB); + for (; *PS != -1; PS++) + if (Weight + BBRegisterPressure[*PS] >= + TRI->getRegPressureSetLimit(*MBB.getParent(), *PS)) + return true; + return false; +} + /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -816,21 +1070,6 @@ if (!MCycle) return false; - auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { - unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; - const int *PS = TRI->getRegClassPressureSets(RC); - // Get register pressure for block SuccToSinkTo. - std::vector BBRegisterPressure = - getBBRegisterPressure(*SuccToSinkTo); - for (; *PS != -1; PS++) - // check if any register pressure set exceeds limit in block SuccToSinkTo - // after sinking. - if (Weight + BBRegisterPressure[*PS] >= - TRI->getRegPressureSetLimit(*MBB->getParent(), *PS)) - return true; - return false; - }; - // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { @@ -870,7 +1109,8 @@ // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. - if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { + if (registerPressureSetExceedsLimit(1, MRI->getRegClass(Reg), + *SuccToSinkTo)) { LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable."); return false; } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15543,25 +15543,8 @@ NumBytes = 0; } - if (!AM.Scale) { - int64_t Offset = AM.BaseOffs; - - // 9-bit signed offset - if (isInt<9>(Offset)) - return true; - - // 12-bit unsigned offset - unsigned shift = Log2_64(NumBytes); - if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && - // Must be a multiple of NumBytes (NumBytes is a power of 2) - (Offset >> shift) << shift == Offset) - return true; - return false; - } - - // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 - - return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes); + return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs, + AM.Scale); } bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const { Index: llvm/lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -140,6 +140,13 @@ getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override; + bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const override; + + MachineInstr *emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const override; + bool getMemOperandsWithOffsetWidth( const MachineInstr &MI, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -340,6 +347,13 @@ static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized); + + // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can + // be used for a load/store of NumBytes. BaseReg is always present and + // implicit. + bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, + unsigned Scale) const; + #define GET_INSTRINFO_HELPER_DECLS #include "AArch64GenInstrInfo.inc" Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2602,6 +2602,740 @@ return AM; } +bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, + Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const { + // Filter out instructions into which we cannot fold. + unsigned NumBytes; + int64_t OffsetScale = 1; + switch (MemI.getOpcode()) { + default: + return false; + + case AArch64::LDURQi: + case AArch64::STURQi: + NumBytes = 16; + break; + + case AArch64::LDURDi: + case AArch64::STURDi: + case AArch64::LDURXi: + case AArch64::STURXi: + NumBytes = 8; + break; + + case AArch64::LDURWi: + case AArch64::LDURSWi: + case AArch64::STURWi: + NumBytes = 4; + break; + + case AArch64::LDURHi: + case AArch64::STURHi: + case AArch64::LDURHHi: + case AArch64::STURHHi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + NumBytes = 2; + break; + + case AArch64::LDRBroX: + case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: + case AArch64::STRBroX: + case AArch64::STRBBroX: + case AArch64::LDURBi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::STURBi: + case AArch64::STURBBi: + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::LDRSBXui: + case AArch64::LDRSBWui: + case AArch64::STRBui: + case AArch64::STRBBui: + NumBytes = 1; + break; + + case AArch64::LDRQroX: + case AArch64::STRQroX: + case AArch64::LDRQui: + case AArch64::STRQui: + NumBytes = 16; + OffsetScale = 16; + break; + + case AArch64::LDRDroX: + case AArch64::STRDroX: + case AArch64::LDRXroX: + case AArch64::STRXroX: + case AArch64::LDRDui: + case AArch64::STRDui: + case AArch64::LDRXui: + case AArch64::STRXui: + NumBytes = 8; + OffsetScale = 8; + break; + + case AArch64::LDRWroX: + case AArch64::LDRSWroX: + case AArch64::STRWroX: + case AArch64::LDRWui: + case AArch64::LDRSWui: + case AArch64::STRWui: + NumBytes = 4; + OffsetScale = 4; + break; + + case AArch64::LDRHroX: + case AArch64::STRHroX: + case AArch64::LDRHHroX: + case AArch64::STRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: + case AArch64::LDRHui: + case AArch64::STRHui: + case AArch64::LDRHHui: + case AArch64::STRHHui: + case AArch64::LDRSHXui: + case AArch64::LDRSHWui: + NumBytes = 2; + OffsetScale = 2; + break; + } + + // Check the fold operand is not the loaded/stored value. + const MachineOperand &BaseRegOp = MemI.getOperand(0); + if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg) + return false; + + // Handle memory instructions with a [Reg, Reg] addtessing mode. + if (MemI.getOperand(2).isReg()) { + // Bail if the addressing mode already includes extension of the offset + // register. + if (MemI.getOperand(3).getImm()) + return false; + + // Check if we actually have a scaled offset. + if (MemI.getOperand(4).getImm() == 0) + OffsetScale = 1; + + // If the address instructions is folded into the base register, then the + // addressing mode must not have a scale. Then we can swap the base and the + // scaled registers. + if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1) + return false; + + switch (AddrI.getOpcode()) { + default: + return false; + + case AArch64::SBFMXri: + // sxtw Xa, Wm + // ldr Xd, [Xn, Xa, lsl #N] + // -> + // ldr Xd, [Xn, Wm, sxtw #N] + if (AddrI.getOperand(2).getImm() != 0 || + AddrI.getOperand(3).getImm() != 31) + return false; + + AM.BaseReg = MemI.getOperand(1).getReg(); + if (AM.BaseReg == Reg) + AM.BaseReg = MemI.getOperand(2).getReg(); + AM.ScaledReg = AddrI.getOperand(1).getReg(); + AM.Scale = OffsetScale; + AM.Displacement = 0; + AM.Form = ExtAddrMode::Formula::SExtScaledReg; + return true; + + case TargetOpcode::SUBREG_TO_REG: { + // mov Wa, Wm + // ldr Xd, [Xn, Xa, lsl #N] + // -> + // ldr Xd, [Xn, Wm, uxtw #N] + + // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG. + if (AddrI.getOperand(1).getImm() != 0 || + AddrI.getOperand(3).getImm() != AArch64::sub_32) + return false; + + const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo(); + Register OffsetReg = AddrI.getOperand(2).getReg(); + if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg)) + return false; + + const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg); + if (DefMI.getOpcode() != AArch64::ORRWrs || + DefMI.getOperand(1).getReg() != AArch64::WZR || + DefMI.getOperand(3).getImm() != 0) + return false; + + AM.BaseReg = MemI.getOperand(1).getReg(); + if (AM.BaseReg == Reg) + AM.BaseReg = MemI.getOperand(2).getReg(); + AM.ScaledReg = DefMI.getOperand(2).getReg(); + AM.Scale = OffsetScale; + AM.Displacement = 0; + AM.Form = ExtAddrMode::Formula::ZExtScaledReg; + return true; + } + } + } + + // Handle memory instructions with a [Reg, #Imm] addressing mode. + auto canFoldAddSubImmIntoAddrMode = [&](int64_t Offset) -> bool { + Offset += MemI.getOperand(2).getImm() * OffsetScale; + if (!isLegalAddressingMode(NumBytes, Offset, /* Scale */ 0)) + return false; + AM.BaseReg = AddrI.getOperand(1).getReg(); + AM.ScaledReg = 0; + AM.Scale = 0; + AM.Displacement = Offset; + AM.Form = ExtAddrMode::Formula::Basic; + return true; + }; + + auto canFoldAddRegIntoAddrMode = + [&](int64_t Scale, + ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool { + if (MemI.getOperand(2).getImm() != 0) + return false; + if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale)) + return false; + AM.BaseReg = AddrI.getOperand(1).getReg(); + AM.ScaledReg = AddrI.getOperand(2).getReg(); + AM.Scale = Scale; + AM.Displacement = 0; + AM.Form = Form; + return true; + }; + + auto avoidSlowSTRQ = [&](const MachineInstr &MemI) { + unsigned Opcode = MemI.getOpcode(); + if ((Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) && + Subtarget.isSTRQroSlow() && !MemI.getMF()->getFunction().hasMinSize()) + return true; + return false; + }; + + int64_t Offset = 0; + switch (AddrI.getOpcode()) { + default: + return false; + + case AArch64::ADDXri: + // add Xa, Xn, #N + // ldr Xd, [Xa, #M] + // -> + // ldr Xd, [Xn, #N'+M] + Offset = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm(); + return canFoldAddSubImmIntoAddrMode(Offset); + + case AArch64::SUBXri: + // sub Xa, Xn, #N + // ldr Xd, [Xa, #M] + // -> + // ldr Xd, [Xn, #N'+M] + Offset = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm(); + return canFoldAddSubImmIntoAddrMode(-Offset); + + case AArch64::ADDXrs: { + // add Xa, Xn, Xm, lsl #N + // ldr Xd, [Xa] + // -> + // ldr Xd, [Xn, Xm, lsl #N] + + // Don't fold the add if the result would be slower, unless optimising for + // size. + if (avoidSlowSTRQ(MemI)) + return false; + int64_t Shift = AddrI.getOperand(3).getImm(); + // Don't allow LSR, ASR, or LSL with shifts other than 0, 1, 2, 3, and 4 + if (Shift > 4) + return false; + // Shift 1 (scale 2) in address is one extra cycle and one extra unit on + // some CPUs. + if (Shift == 1 && !Subtarget.hasLSLFast()) + return false; + return canFoldAddRegIntoAddrMode(1 << Shift); + } + + case AArch64::ADDXrr: + // add Xa, Xn, Xm + // ldr Xd, [Xa] + // -> + // ldr Xd, [Xn, Xm, lsl #0] + + // Don't fold the add if the result would be slower, unless optimising for + // size. + if (avoidSlowSTRQ(MemI)) + return false; + return canFoldAddRegIntoAddrMode(1); + + case AArch64::ADDXrx: + // add Xa, Xn, Wm, {s,u}xtw #N + // ldr Xd, [Xa] + // -> + // ldr Xd, [Xn, Wm, {s,u}xtw #N] + + // Don't fold the add if the result would be slower, unless optimising for + // size. + if (avoidSlowSTRQ(MemI)) + return false; + + // Can fold only sign-/zero-extend of a word. + unsigned Imm = static_cast(AddrI.getOperand(3).getImm()); + AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm); + if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW) + return false; + + return canFoldAddRegIntoAddrMode(1 << AArch64_AM::getArithShiftValue(Imm), + (Extend == AArch64_AM::SXTW) + ? ExtAddrMode::Formula::SExtScaledReg + : ExtAddrMode::Formula::ZExtScaledReg); + } +} + +// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, +// return the opcode of an instruction performing the same operation, but using +// the [Reg, Reg] addressing mode. +static unsigned regOffsetOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Address folding not implemented for instruction"); + + case AArch64::LDURQi: + case AArch64::LDRQui: + return AArch64::LDRQroX; + case AArch64::STURQi: + case AArch64::STRQui: + return AArch64::STRQroX; + case AArch64::LDURDi: + case AArch64::LDRDui: + return AArch64::LDRDroX; + case AArch64::STURDi: + case AArch64::STRDui: + return AArch64::STRDroX; + case AArch64::LDURXi: + case AArch64::LDRXui: + return AArch64::LDRXroX; + case AArch64::STURXi: + case AArch64::STRXui: + return AArch64::STRXroX; + case AArch64::LDURWi: + case AArch64::LDRWui: + return AArch64::LDRWroX; + case AArch64::LDURSWi: + case AArch64::LDRSWui: + return AArch64::LDRSWroX; + case AArch64::STURWi: + case AArch64::STRWui: + return AArch64::STRWroX; + case AArch64::LDURHi: + case AArch64::LDRHui: + return AArch64::LDRHroX; + case AArch64::STURHi: + case AArch64::STRHui: + return AArch64::STRHroX; + case AArch64::LDURHHi: + case AArch64::LDRHHui: + return AArch64::LDRHHroX; + case AArch64::STURHHi: + case AArch64::STRHHui: + return AArch64::STRHHroX; + case AArch64::LDURSHXi: + case AArch64::LDRSHXui: + return AArch64::LDRSHXroX; + case AArch64::LDURSHWi: + case AArch64::LDRSHWui: + return AArch64::LDRSHWroX; + case AArch64::LDURBi: + case AArch64::LDRBui: + return AArch64::LDRBroX; + case AArch64::LDURBBi: + case AArch64::LDRBBui: + return AArch64::LDRBBroX; + case AArch64::LDURSBXi: + case AArch64::LDRSBXui: + return AArch64::LDRSBXroX; + case AArch64::LDURSBWi: + case AArch64::LDRSBWui: + return AArch64::LDRSBWroX; + case AArch64::STURBi: + case AArch64::STRBui: + return AArch64::STRBroX; + case AArch64::STURBBi: + case AArch64::STRBBui: + return AArch64::STRBBroX; + } +} + +// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return +// the opcode of an instruction performing the same operation, but using the +// [Reg, #Imm] addressing mode with scaled offset. +unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) { + switch (Opcode) { + default: + llvm_unreachable("Address folding not implemented for instruction"); + + case AArch64::LDURQi: + Scale = 16; + return AArch64::LDRQui; + case AArch64::STURQi: + Scale = 16; + return AArch64::STRQui; + case AArch64::LDURDi: + Scale = 8; + return AArch64::LDRDui; + case AArch64::STURDi: + Scale = 8; + return AArch64::STRDui; + case AArch64::LDURXi: + Scale = 8; + return AArch64::LDRXui; + case AArch64::STURXi: + Scale = 8; + return AArch64::STRXui; + case AArch64::LDURWi: + Scale = 4; + return AArch64::LDRWui; + case AArch64::LDURSWi: + Scale = 4; + return AArch64::LDRSWui; + case AArch64::STURWi: + Scale = 4; + return AArch64::STRWui; + case AArch64::LDURHi: + Scale = 2; + return AArch64::LDRHui; + case AArch64::STURHi: + Scale = 2; + return AArch64::STRHui; + case AArch64::LDURHHi: + Scale = 2; + return AArch64::LDRHHui; + case AArch64::STURHHi: + Scale = 2; + return AArch64::STRHHui; + case AArch64::LDURSHXi: + Scale = 2; + return AArch64::LDRSHXui; + case AArch64::LDURSHWi: + Scale = 2; + return AArch64::LDRSHWui; + case AArch64::LDURBi: + Scale = 1; + return AArch64::LDRBui; + case AArch64::LDURBBi: + Scale = 1; + return AArch64::LDRBBui; + case AArch64::LDURSBXi: + Scale = 1; + return AArch64::LDRSBXui; + case AArch64::LDURSBWi: + Scale = 1; + return AArch64::LDRSBWui; + case AArch64::STURBi: + Scale = 1; + return AArch64::STRBui; + case AArch64::STURBBi: + Scale = 1; + return AArch64::STRBBui; + case AArch64::LDRQui: + case AArch64::STRQui: + Scale = 16; + return Opcode; + case AArch64::LDRDui: + case AArch64::STRDui: + case AArch64::LDRXui: + case AArch64::STRXui: + Scale = 8; + return Opcode; + case AArch64::LDRWui: + case AArch64::LDRSWui: + case AArch64::STRWui: + Scale = 4; + return Opcode; + case AArch64::LDRHui: + case AArch64::STRHui: + case AArch64::LDRHHui: + case AArch64::STRHHui: + case AArch64::LDRSHXui: + case AArch64::LDRSHWui: + Scale = 2; + return Opcode; + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::LDRSBXui: + case AArch64::LDRSBWui: + case AArch64::STRBui: + case AArch64::STRBBui: + Scale = 1; + return Opcode; + } +} + +// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return +// the opcode of an instruction performing the same operation, but using the +// [Reg, #Imm] addressing mode with unscaled offset. +unsigned unscaledOffsetOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Address folding not implemented for instruction"); + + case AArch64::LDURQi: + case AArch64::STURQi: + case AArch64::LDURDi: + case AArch64::STURDi: + case AArch64::LDURXi: + case AArch64::STURXi: + case AArch64::LDURWi: + case AArch64::LDURSWi: + case AArch64::STURWi: + case AArch64::LDURHi: + case AArch64::STURHi: + case AArch64::LDURHHi: + case AArch64::STURHHi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::LDURBi: + case AArch64::STURBi: + case AArch64::LDURBBi: + case AArch64::STURBBi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + return Opcode; + case AArch64::LDRQui: + return AArch64::LDURQi; + case AArch64::STRQui: + return AArch64::STURQi; + case AArch64::LDRDui: + return AArch64::LDURDi; + case AArch64::STRDui: + return AArch64::STURDi; + case AArch64::LDRXui: + return AArch64::LDURXi; + case AArch64::STRXui: + return AArch64::STURXi; + case AArch64::LDRWui: + return AArch64::LDURWi; + case AArch64::LDRSWui: + return AArch64::LDURSWi; + case AArch64::STRWui: + return AArch64::STURWi; + case AArch64::LDRHui: + return AArch64::LDURHi; + case AArch64::STRHui: + return AArch64::STURHi; + case AArch64::LDRHHui: + return AArch64::LDURHHi; + case AArch64::STRHHui: + return AArch64::STURHHi; + case AArch64::LDRSHXui: + return AArch64::LDURSHXi; + case AArch64::LDRSHWui: + return AArch64::LDURSHWi; + case AArch64::LDRBBui: + return AArch64::LDURBBi; + case AArch64::LDRBui: + return AArch64::LDURBi; + case AArch64::STRBBui: + return AArch64::STURBBi; + case AArch64::STRBui: + return AArch64::STURBi; + case AArch64::LDRSBWui: + return AArch64::LDURSBWi; + case AArch64::LDRSBXui: + return AArch64::LDURSBXi; + } +} + +// Given the opcode of a memory load/store instruction, return the opcode of an +// instruction performing the same operation, but using +// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the +// offset register. +static unsigned offsetExtendOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Address folding not implemented for instruction"); + + case AArch64::LDRQroX: + case AArch64::LDURQi: + case AArch64::LDRQui: + return AArch64::LDRQroW; + case AArch64::STRQroX: + case AArch64::STURQi: + case AArch64::STRQui: + return AArch64::STRQroW; + case AArch64::LDRDroX: + case AArch64::LDURDi: + case AArch64::LDRDui: + return AArch64::LDRDroW; + case AArch64::STRDroX: + case AArch64::STURDi: + case AArch64::STRDui: + return AArch64::STRDroW; + case AArch64::LDRXroX: + case AArch64::LDURXi: + case AArch64::LDRXui: + return AArch64::LDRXroW; + case AArch64::STRXroX: + case AArch64::STURXi: + case AArch64::STRXui: + return AArch64::STRXroW; + case AArch64::LDRWroX: + case AArch64::LDURWi: + case AArch64::LDRWui: + return AArch64::LDRWroW; + case AArch64::LDRSWroX: + case AArch64::LDURSWi: + case AArch64::LDRSWui: + return AArch64::LDRSWroW; + case AArch64::STRWroX: + case AArch64::STURWi: + case AArch64::STRWui: + return AArch64::STRWroW; + case AArch64::LDRHroX: + case AArch64::LDURHi: + case AArch64::LDRHui: + return AArch64::LDRHroW; + case AArch64::STRHroX: + case AArch64::STURHi: + case AArch64::STRHui: + return AArch64::STRHroW; + case AArch64::LDRHHroX: + case AArch64::LDURHHi: + case AArch64::LDRHHui: + return AArch64::LDRHHroW; + case AArch64::STRHHroX: + case AArch64::STURHHi: + case AArch64::STRHHui: + return AArch64::STRHHroW; + case AArch64::LDRSHXroX: + case AArch64::LDURSHXi: + case AArch64::LDRSHXui: + return AArch64::LDRSHXroW; + case AArch64::LDRSHWroX: + case AArch64::LDURSHWi: + case AArch64::LDRSHWui: + return AArch64::LDRSHWroW; + case AArch64::LDRBroX: + case AArch64::LDURBi: + case AArch64::LDRBui: + return AArch64::LDRBroW; + case AArch64::LDRBBroX: + case AArch64::LDURBBi: + case AArch64::LDRBBui: + return AArch64::LDRBBroW; + case AArch64::LDRSBXroX: + case AArch64::LDURSBXi: + case AArch64::LDRSBXui: + return AArch64::LDRSBXroW; + case AArch64::LDRSBWroX: + case AArch64::LDURSBWi: + case AArch64::LDRSBWui: + return AArch64::LDRSBWroW; + case AArch64::STRBroX: + case AArch64::STURBi: + case AArch64::STRBui: + return AArch64::STRBroW; + case AArch64::STRBBroX: + case AArch64::STURBBi: + case AArch64::STRBBui: + return AArch64::STRBBroW; + } +} + +MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const { + + const DebugLoc &DL = MemI.getDebugLoc(); + MachineBasicBlock &MBB = *MemI.getParent(); + MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo(); + + if (AM.Form == ExtAddrMode::Formula::Basic) { + if (AM.ScaledReg) { + // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`. + unsigned Opcode = regOffsetOpcode(MemI.getOpcode()); + // Copy the base register to the correct register class. + Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); + BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), BaseReg) + .addReg(AM.BaseReg); + auto B = BuildMI(MBB, MemI, DL, get(Opcode)) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(BaseReg) + .addReg(AM.ScaledReg) + .addImm(0) + .addImm(AM.Scale > 1) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + return B.getInstr(); + } + + assert(AM.ScaledReg == 0 && AM.Scale == 0 && + "Addressing mode not supported for folding"); + + // The new insrtruction will be in the form `ld[u]r Rt, [Xn, #imm]`. + unsigned Scale = 1; + unsigned Opcode = MemI.getOpcode(); + if (isInt<9>(AM.Displacement)) + Opcode = unscaledOffsetOpcode(Opcode); + else + Opcode = scaledOffsetOpcode(Opcode, Scale); + + auto B = BuildMI(MBB, MemI, DL, get(Opcode)) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement / Scale) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + return B.getInstr(); + } + + if (AM.Form == ExtAddrMode::Formula::SExtScaledReg || + AM.Form == ExtAddrMode::Formula::ZExtScaledReg) { + // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`. + assert(AM.ScaledReg && !AM.Displacement && + "Address offset can be a register or an immediate, but not both"); + unsigned Opcode = offsetExtendOpcode(MemI.getOpcode()); + // Copy the base register to the correct register class. + Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); + BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), BaseReg).addReg(AM.BaseReg); + // Copy the offset register to the correct register class. + Register OffsetReg = AM.ScaledReg; + const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg); + if (RC != &AArch64::GPR32RegClass) { + OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) + BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg) + .addReg(AM.ScaledReg, 0, AArch64::sub_32); + else + BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg) + .addReg(AM.ScaledReg); + } + auto B = BuildMI(MBB, MemI, DL, get(Opcode)) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(BaseReg) + .addReg(OffsetReg) + .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg) + .addImm(AM.Scale != 1) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + + return B.getInstr(); + } + + llvm_unreachable( + "Function must not be called with an addressing mode it can't handle"); +} + bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -8387,6 +9121,30 @@ return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2; } +bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset, + unsigned Scale) const { + if (Offset && Scale) + return false; + + // Check Reg + Imm + if (!Scale) { + // 9-bit signed offset + if (isInt<9>(Offset)) + return true; + + // 12-bit unsigned offset + unsigned shift = Log2_64(NumBytes); + if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && + // Must be a multiple of NumBytes (NumBytes is a power of 2) + (Offset >> shift) << shift == Offset) + return true; + return false; + } + + // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 + return Scale == 1 || (Scale > 0 && Scale == NumBytes); +} + unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { if (MF.getSubtarget().hardenSlsBlr()) return AArch64::BLRNoIP; Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -197,6 +197,11 @@ cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"), cl::init(false), cl::Hidden); +static cl::opt + EnableSinkFold("aarch64-enable-sink-fold", + cl::desc("Enable sinking and folding of instruction copies"), + cl::init(true), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); @@ -473,6 +478,7 @@ : TargetPassConfig(TM, PM) { if (TM.getOptLevel() != CodeGenOpt::None) substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); + setEnableSinkAndFold(EnableSinkFold); } AArch64TargetMachine &getAArch64TargetMachine() const { Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3843,7 +3843,7 @@ return true; ExtAddrMode AM = *AMOrNone; - + assert(AM.Form == ExtAddrMode::Formula::Basic); if (AM.ScaledReg != X86::NoRegister) { switch (AM.Scale) { case 1: Index: llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll =================================================================== --- llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll +++ llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -o - | FileCheck %s -; RUN: llc -mattr=+lsl-fast < %s -o - | FileCheck %s -check-prefix=LSLFAST +; RUN: llc < %s | FileCheck %s +; RUN: llc -mattr=+lsl-fast --aarch64-enable-sink-fold=false < %s | FileCheck %s -check-prefix=LSLFAST target triple = "aarch64-linux" declare void @g(...) Index: llvm/test/CodeGen/AArch64/align-down.ll =================================================================== --- llvm/test/CodeGen/AArch64/align-down.ll +++ llvm/test/CodeGen/AArch64/align-down.ll @@ -55,9 +55,9 @@ ; CHECK-LABEL: t3_extrause0: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: sub w9, w1, #1 ; CHECK-NEXT: and w0, w0, w8 -; CHECK-NEXT: sub w8, w1, #1 -; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret %mask = add i32 %alignment, -1 store i32 %mask, i32* %mask_storage Index: llvm/test/CodeGen/AArch64/and-mask-removal.ll =================================================================== --- llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -10,21 +10,20 @@ define void @new_position(i32 %pos) { ; CHECK-SD-LABEL: new_position: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: adrp x9, _board@GOTPAGE +; CHECK-SD-NEXT: adrp x8, _board@GOTPAGE ; CHECK-SD-NEXT: ; kill: def $w0 killed $w0 def $x0 -; CHECK-SD-NEXT: sxtw x8, w0 -; CHECK-SD-NEXT: ldr x9, [x9, _board@GOTPAGEOFF] -; CHECK-SD-NEXT: ldrb w9, [x9, x8] -; CHECK-SD-NEXT: sub w9, w9, #1 -; CHECK-SD-NEXT: cmp w9, #1 +; CHECK-SD-NEXT: ldr x8, [x8, _board@GOTPAGEOFF] +; CHECK-SD-NEXT: ldrb w8, [x8, w0, sxtw] +; CHECK-SD-NEXT: sub w8, w8, #1 +; CHECK-SD-NEXT: cmp w8, #1 ; CHECK-SD-NEXT: b.hi LBB0_2 ; CHECK-SD-NEXT: ; %bb.1: ; %if.then -; CHECK-SD-NEXT: adrp x9, _next_string@GOTPAGE -; CHECK-SD-NEXT: adrp x10, _string_number@GOTPAGE -; CHECK-SD-NEXT: ldr x9, [x9, _next_string@GOTPAGEOFF] -; CHECK-SD-NEXT: ldr w9, [x9] -; CHECK-SD-NEXT: ldr x10, [x10, _string_number@GOTPAGEOFF] -; CHECK-SD-NEXT: str w9, [x10, x8, lsl #2] +; CHECK-SD-NEXT: adrp x8, _next_string@GOTPAGE +; CHECK-SD-NEXT: adrp x9, _string_number@GOTPAGE +; CHECK-SD-NEXT: ldr x8, [x8, _next_string@GOTPAGEOFF] +; CHECK-SD-NEXT: ldr w8, [x8] +; CHECK-SD-NEXT: ldr x9, [x9, _string_number@GOTPAGEOFF] +; CHECK-SD-NEXT: str w8, [x9, w0, sxtw #2] ; CHECK-SD-NEXT: LBB0_2: ; %if.end ; CHECK-SD-NEXT: ret ; @@ -270,15 +269,15 @@ define zeroext i1 @test16_0(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_0: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #5086 +; CHECK-SD-NEXT: mov w8, #5086 ; =0x13de ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_0: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #18547 -; CHECK-GI-NEXT: mov w9, #23633 +; CHECK-GI-NEXT: mov w8, #18547 ; =0x4873 +; CHECK-GI-NEXT: mov w9, #23633 ; =0x5c51 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -296,8 +295,8 @@ define zeroext i1 @test16_2(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_2: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #16882 -; CHECK-SD-NEXT: mov w9, #40700 +; CHECK-SD-NEXT: mov w8, #16882 ; =0x41f2 +; CHECK-SD-NEXT: mov w9, #40700 ; =0x9efc ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, hi @@ -305,8 +304,8 @@ ; ; CHECK-GI-LABEL: test16_2: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #16882 -; CHECK-GI-NEXT: mov w9, #40699 +; CHECK-GI-NEXT: mov w8, #16882 ; =0x41f2 +; CHECK-GI-NEXT: mov w9, #40699 ; =0x9efb ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, hs @@ -324,15 +323,15 @@ define zeroext i1 @test16_3(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_3: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #53200 +; CHECK-SD-NEXT: mov w8, #53200 ; =0xcfd0 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_3: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #29283 -; CHECK-GI-NEXT: mov w9, #16947 +; CHECK-GI-NEXT: mov w8, #29283 ; =0x7263 +; CHECK-GI-NEXT: mov w9, #16947 ; =0x4233 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -350,8 +349,8 @@ define zeroext i1 @test16_4(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_4: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #29985 -; CHECK-SD-NEXT: mov w9, #15676 +; CHECK-SD-NEXT: mov w8, #29985 ; =0x7521 +; CHECK-SD-NEXT: mov w9, #15676 ; =0x3d3c ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, lo @@ -359,8 +358,8 @@ ; ; CHECK-GI-LABEL: test16_4: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #29985 -; CHECK-GI-NEXT: mov w9, #15677 +; CHECK-GI-NEXT: mov w8, #29985 ; =0x7521 +; CHECK-GI-NEXT: mov w9, #15677 ; =0x3d3d ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ls @@ -378,15 +377,15 @@ define zeroext i1 @test16_5(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_5: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #23282 +; CHECK-SD-NEXT: mov w8, #23282 ; =0x5af2 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_5: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #-25214 -; CHECK-GI-NEXT: mov w9, #63604 +; CHECK-GI-NEXT: mov w8, #-25214 ; =0xffff9d82 +; CHECK-GI-NEXT: mov w9, #63604 ; =0xf874 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -404,8 +403,8 @@ define zeroext i1 @test16_6(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_6: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #-32194 -; CHECK-SD-NEXT: mov w9, #24320 +; CHECK-SD-NEXT: mov w8, #-32194 ; =0xffff823e +; CHECK-SD-NEXT: mov w9, #24320 ; =0x5f00 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w8, w9 ; CHECK-SD-NEXT: cset w0, hi @@ -413,8 +412,8 @@ ; ; CHECK-GI-LABEL: test16_6: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #-32194 -; CHECK-GI-NEXT: mov w9, #24321 +; CHECK-GI-NEXT: mov w8, #-32194 ; =0xffff823e +; CHECK-GI-NEXT: mov w9, #24321 ; =0x5f01 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: cset w0, hs @@ -432,8 +431,8 @@ define zeroext i1 @test16_7(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_7: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #9272 -; CHECK-SD-NEXT: mov w9, #22619 +; CHECK-SD-NEXT: mov w8, #9272 ; =0x2438 +; CHECK-SD-NEXT: mov w9, #22619 ; =0x585b ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, lo @@ -441,8 +440,8 @@ ; ; CHECK-GI-LABEL: test16_7: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #9272 -; CHECK-GI-NEXT: mov w9, #22620 +; CHECK-GI-NEXT: mov w8, #9272 ; =0x2438 +; CHECK-GI-NEXT: mov w9, #22620 ; =0x585c ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ls @@ -460,7 +459,7 @@ define zeroext i1 @test16_8(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_8: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #4919 +; CHECK-SD-NEXT: mov w8, #4919 ; =0x1337 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret @@ -468,7 +467,7 @@ ; CHECK-GI-LABEL: test16_8: ; CHECK-GI: ; %bb.0: ; %entry ; CHECK-GI-NEXT: add w8, w0, #1787 -; CHECK-GI-NEXT: mov w9, #6706 +; CHECK-GI-NEXT: mov w9, #6706 ; =0x1a32 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne ; CHECK-GI-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-ldp.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ldp.ll +++ llvm/test/CodeGen/AArch64/arm64-ldp.ll @@ -422,10 +422,8 @@ ; CHECK-LABEL: ldp_sext_int_post: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldpsw x19, x20, [x0] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ldpsw x19, x20, [x0], #8 ; CHECK-NEXT: bl "use-ptr" ; CHECK-NEXT: add x0, x20, x19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload Index: llvm/test/CodeGen/AArch64/arm64-long-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-long-shift.ll +++ llvm/test/CodeGen/AArch64/arm64-long-shift.ll @@ -26,8 +26,8 @@ ; CHECK-NEXT: and x10, x2, #0x3f ; CHECK-NEXT: eor x10, x10, #0x3f ; CHECK-NEXT: lsr x9, x9, x10 -; CHECK-NEXT: orr x1, x8, x9 ; CHECK-NEXT: lsl x0, x0, x2 +; CHECK-NEXT: orr x1, x8, x9 ; CHECK-NEXT: ret %mask = and i128 %s, 63 %shl = shl i128 %r, %mask @@ -60,8 +60,8 @@ ; CHECK-NEXT: and x10, x2, #0x3f ; CHECK-NEXT: eor x10, x10, #0x3f ; CHECK-NEXT: lsl x9, x9, x10 -; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: asr x1, x1, x2 +; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret %mask = and i128 %s, 63 %shr = ashr i128 %r, %mask @@ -93,8 +93,8 @@ ; CHECK-NEXT: and x10, x2, #0x3f ; CHECK-NEXT: eor x10, x10, #0x3f ; CHECK-NEXT: lsl x9, x9, x10 -; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: lsr x1, x1, x2 +; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret %mask = and i128 %s, 63 %shr = lshr i128 %r, %mask Index: llvm/test/CodeGen/AArch64/arm64-stp.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-stp.ll +++ llvm/test/CodeGen/AArch64/arm64-stp.ll @@ -182,9 +182,8 @@ ; CHECK-LABEL: stp_int_rar_hazard: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x2, #8] -; CHECK-NEXT: add w8, w8, w1 ; CHECK-NEXT: stp w0, w1, [x2] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret store i32 %a, ptr %p, align 4 %ld.ptr = getelementptr inbounds i32, ptr %p, i64 2 @@ -200,8 +199,8 @@ ; CHECK-LABEL: stp_int_rar_hazard_after: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x3, #4] -; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: stp w1, w2, [x3] +; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret store i32 %a, ptr %p, align 4 %ld.ptr = getelementptr inbounds i32, ptr %p, i64 1 Index: llvm/test/CodeGen/AArch64/arm64-xaluo.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -102,7 +102,7 @@ define zeroext i1 @saddo4.i32(i32 %v1, ptr %res) { ; SDAG-LABEL: saddo4.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: mov w8, #16777215 +; SDAG-NEXT: mov w8, #16777215 // =0xffffff ; SDAG-NEXT: adds w8, w0, w8 ; SDAG-NEXT: cset w0, vs ; SDAG-NEXT: str w8, [x1] @@ -110,7 +110,7 @@ ; ; FAST-LABEL: saddo4.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: mov w8, #16777215 +; FAST-NEXT: mov w8, #16777215 // =0xffffff ; FAST-NEXT: adds w8, w0, w8 ; FAST-NEXT: cset w9, vs ; FAST-NEXT: and w0, w9, #0x1 @@ -119,7 +119,7 @@ ; ; GISEL-LABEL: saddo4.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mov w8, #16777215 +; GISEL-NEXT: mov w8, #16777215 // =0xffffff ; GISEL-NEXT: adds w8, w0, w8 ; GISEL-NEXT: cset w0, vs ; GISEL-NEXT: str w8, [x1] @@ -615,12 +615,11 @@ ; FAST-LABEL: umulo.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x8, x0, x1 -; FAST-NEXT: mul x9, x0, x1 ; FAST-NEXT: cmp xzr, x8 -; FAST-NEXT: cset w8, ne -; FAST-NEXT: and w8, w8, #0x1 -; FAST-NEXT: mov w0, w8 -; FAST-NEXT: str x9, [x2] +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: cset w9, ne +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x2] ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.i64: @@ -1327,7 +1326,7 @@ ; SDAG-LABEL: uaddo.selectboth.i8: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, uxtb ; SDAG-NEXT: tst w8, #0x100 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1336,7 +1335,7 @@ ; FAST-LABEL: uaddo.selectboth.i8: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, uxtb ; FAST-NEXT: tst w8, #0x100 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1345,7 +1344,7 @@ ; GISEL-LABEL: uaddo.selectboth.i8: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w1, #0xff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, uxtb ; GISEL-NEXT: cmp w8, w8, uxtb ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1362,7 +1361,7 @@ ; SDAG-LABEL: saddo.selectboth.i8: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: sxtb w8, w0 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, sxtb ; SDAG-NEXT: cmp w8, w8, sxtb ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1371,7 +1370,7 @@ ; FAST-LABEL: saddo.selectboth.i8: ; FAST: // %bb.0: // %entry ; FAST-NEXT: sxtb w8, w0 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, sxtb ; FAST-NEXT: cmp w8, w8, sxtb ; FAST-NEXT: csel w0, w8, w9, ne @@ -1380,7 +1379,7 @@ ; GISEL-LABEL: saddo.selectboth.i8: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: sxtb w8, w1 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, sxtb ; GISEL-NEXT: cmp w8, w8, sxtb ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1397,7 +1396,7 @@ ; SDAG-LABEL: uaddo.selectboth.i16: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xffff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, uxth ; SDAG-NEXT: tst w8, #0x10000 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1406,7 +1405,7 @@ ; FAST-LABEL: uaddo.selectboth.i16: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xffff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, uxth ; FAST-NEXT: tst w8, #0x10000 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1415,7 +1414,7 @@ ; GISEL-LABEL: uaddo.selectboth.i16: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w1, #0xffff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, uxth ; GISEL-NEXT: cmp w8, w8, uxth ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1432,7 +1431,7 @@ ; SDAG-LABEL: saddo.selectboth.i16: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: sxth w8, w0 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, sxth ; SDAG-NEXT: cmp w8, w8, sxth ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1441,7 +1440,7 @@ ; FAST-LABEL: saddo.selectboth.i16: ; FAST: // %bb.0: // %entry ; FAST-NEXT: sxth w8, w0 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, sxth ; FAST-NEXT: cmp w8, w8, sxth ; FAST-NEXT: csel w0, w8, w9, ne @@ -1450,7 +1449,7 @@ ; GISEL-LABEL: saddo.selectboth.i16: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: sxth w8, w1 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, sxth ; GISEL-NEXT: cmp w8, w8, sxth ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1467,21 +1466,21 @@ ; SDAG-LABEL: uaddo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, hs ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, hs ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, hs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1498,21 +1497,21 @@ ; SDAG-LABEL: saddo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1529,21 +1528,21 @@ ; SDAG-LABEL: uaddo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, hs ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, hs ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, hs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1560,21 +1559,21 @@ ; SDAG-LABEL: saddo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1591,7 +1590,7 @@ ; SDAG-LABEL: usubo.selectboth.i8: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: sub w8, w8, w1, uxtb ; SDAG-NEXT: tst w8, #0xffffff00 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1600,7 +1599,7 @@ ; FAST-LABEL: usubo.selectboth.i8: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: sub w8, w8, w1, uxtb ; FAST-NEXT: tst w8, #0xffffff00 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1609,7 +1608,7 @@ ; GISEL-LABEL: usubo.selectboth.i8: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w0, #0xff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: sub w8, w8, w1, uxtb ; GISEL-NEXT: cmp w8, w8, uxtb ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1626,7 +1625,7 @@ ; CHECK-LABEL: ssubo.selectboth.i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: mov w9, #10 // =0xa ; CHECK-NEXT: sub w8, w8, w1, sxtb ; CHECK-NEXT: cmp w8, w8, sxtb ; CHECK-NEXT: csel w0, w8, w9, ne @@ -1643,7 +1642,7 @@ ; SDAG-LABEL: usubo.selectboth.i16: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xffff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: sub w8, w8, w1, uxth ; SDAG-NEXT: tst w8, #0xffff0000 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1652,7 +1651,7 @@ ; FAST-LABEL: usubo.selectboth.i16: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xffff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: sub w8, w8, w1, uxth ; FAST-NEXT: tst w8, #0xffff0000 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1661,7 +1660,7 @@ ; GISEL-LABEL: usubo.selectboth.i16: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w0, #0xffff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: sub w8, w8, w1, uxth ; GISEL-NEXT: cmp w8, w8, uxth ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1678,7 +1677,7 @@ ; CHECK-LABEL: ssubo.selectboth.i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: mov w9, #10 // =0xa ; CHECK-NEXT: sub w8, w8, w1, sxth ; CHECK-NEXT: cmp w8, w8, sxth ; CHECK-NEXT: csel w0, w8, w9, ne @@ -1695,21 +1694,21 @@ ; SDAG-LABEL: usubo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, lo ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, lo ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, lo ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1726,21 +1725,21 @@ ; SDAG-LABEL: ssubo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1757,21 +1756,21 @@ ; SDAG-LABEL: usubo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, lo ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, lo ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, lo ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1788,21 +1787,21 @@ ; SDAG-LABEL: ssubo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1822,7 +1821,7 @@ ; SDAG-NEXT: and w8, w1, #0xff ; SDAG-NEXT: and w9, w0, #0xff ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: tst w8, #0xff00 ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1832,7 +1831,7 @@ ; FAST-NEXT: and w8, w1, #0xff ; FAST-NEXT: and w9, w0, #0xff ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: tst w8, #0xff00 ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1842,7 +1841,7 @@ ; GISEL-NEXT: and w8, w0, #0xff ; GISEL-NEXT: and w9, w1, #0xff ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, uxtb ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1860,7 +1859,7 @@ ; SDAG-NEXT: sxtb w8, w1 ; SDAG-NEXT: sxtb w9, w0 ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: cmp w8, w8, sxtb ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1870,7 +1869,7 @@ ; FAST-NEXT: sxtb w8, w1 ; FAST-NEXT: sxtb w9, w0 ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: cmp w8, w8, sxtb ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1880,7 +1879,7 @@ ; GISEL-NEXT: sxtb w8, w0 ; GISEL-NEXT: sxtb w9, w1 ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, sxtb ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1898,7 +1897,7 @@ ; SDAG-NEXT: and w8, w1, #0xffff ; SDAG-NEXT: and w9, w0, #0xffff ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: tst w8, #0xffff0000 ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1908,7 +1907,7 @@ ; FAST-NEXT: and w8, w1, #0xffff ; FAST-NEXT: and w9, w0, #0xffff ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: tst w8, #0xffff0000 ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1918,7 +1917,7 @@ ; GISEL-NEXT: and w8, w0, #0xffff ; GISEL-NEXT: and w9, w1, #0xffff ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, uxth ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1936,7 +1935,7 @@ ; SDAG-NEXT: sxth w8, w1 ; SDAG-NEXT: sxth w9, w0 ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: cmp w8, w8, sxth ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1946,7 +1945,7 @@ ; FAST-NEXT: sxth w8, w1 ; FAST-NEXT: sxth w9, w0 ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: cmp w8, w8, sxth ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1956,7 +1955,7 @@ ; GISEL-NEXT: sxth w8, w0 ; GISEL-NEXT: sxth w9, w1 ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, sxth ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1972,7 +1971,7 @@ ; SDAG-LABEL: umulo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: tst x9, #0xffffffff00000000 ; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret @@ -1980,7 +1979,7 @@ ; FAST-LABEL: umulo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #10 +; FAST-NEXT: mov w8, #10 // =0xa ; FAST-NEXT: tst x9, #0xffffffff00000000 ; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret @@ -1988,7 +1987,7 @@ ; GISEL-LABEL: umulo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul w10, w0, w1 ; GISEL-NEXT: lsr x9, x9, #32 ; GISEL-NEXT: cmp w9, #0 @@ -2006,7 +2005,7 @@ ; SDAG-LABEL: smulo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: smull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: cmp x9, w9, sxtw ; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret @@ -2014,7 +2013,7 @@ ; FAST-LABEL: smulo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #10 +; FAST-NEXT: mov w8, #10 // =0xa ; FAST-NEXT: cmp x9, w9, sxtw ; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret @@ -2022,7 +2021,7 @@ ; GISEL-LABEL: smulo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: smull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul w10, w0, w1 ; GISEL-NEXT: asr x9, x9, #32 ; GISEL-NEXT: cmp w9, w10, asr #31 @@ -2040,7 +2039,7 @@ ; SDAG-LABEL: umulo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umulh x9, x0, x1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: mul x10, x0, x1 ; SDAG-NEXT: cmp xzr, x9 ; SDAG-NEXT: csel x0, x10, x8, ne @@ -2049,7 +2048,7 @@ ; FAST-LABEL: umulo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mov x8, #10 // =0xa ; FAST-NEXT: mul x10, x0, x1 ; FAST-NEXT: cmp xzr, x9 ; FAST-NEXT: csel x0, x10, x8, ne @@ -2058,7 +2057,7 @@ ; GISEL-LABEL: umulo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umulh x9, x0, x1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul x10, x0, x1 ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: csel x0, x10, x8, ne @@ -2075,7 +2074,7 @@ ; SDAG-LABEL: smulo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: mul x9, x0, x1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: smulh x10, x0, x1 ; SDAG-NEXT: cmp x10, x9, asr #63 ; SDAG-NEXT: csel x0, x9, x8, ne @@ -2084,7 +2083,7 @@ ; FAST-LABEL: smulo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mov x8, #10 // =0xa ; FAST-NEXT: smulh x10, x0, x1 ; FAST-NEXT: cmp x10, x9, asr #63 ; FAST-NEXT: csel x0, x9, x8, ne @@ -2093,7 +2092,7 @@ ; GISEL-LABEL: smulo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: mul x9, x0, x1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: smulh x10, x0, x1 ; GISEL-NEXT: cmp x10, x9, asr #63 ; GISEL-NEXT: csel x0, x9, x8, ne @@ -2120,7 +2119,7 @@ ; FAST-LABEL: saddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2155,7 +2154,7 @@ ; FAST-LABEL: saddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2190,7 +2189,7 @@ ; FAST-LABEL: uaddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, hs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2225,7 +2224,7 @@ ; FAST-LABEL: uaddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, hs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2260,7 +2259,7 @@ ; FAST-LABEL: ssubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2295,7 +2294,7 @@ ; FAST-LABEL: ssubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2330,7 +2329,7 @@ ; FAST-LABEL: usubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, lo ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2365,7 +2364,7 @@ ; FAST-LABEL: usubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, lo ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2401,7 +2400,7 @@ ; FAST-LABEL: smulo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cmp x9, w9, sxtw ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2442,7 +2441,7 @@ ; FAST-LABEL: smulo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: smulh x10, x0, x1 ; FAST-NEXT: cmp x10, x9, asr #63 ; FAST-NEXT: cset w9, ne @@ -2481,7 +2480,7 @@ ; FAST-LABEL: smulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cset w9, vs ; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 @@ -2517,7 +2516,7 @@ ; FAST-LABEL: umulo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: tst x9, #0xffffffff00000000 ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2556,7 +2555,7 @@ ; FAST-LABEL: umulo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cmp xzr, x9 ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2593,7 +2592,7 @@ ; FAST-LABEL: umulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cset w9, hs ; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 @@ -2621,17 +2620,17 @@ define i8 @pr60530() { ; SDAG-LABEL: pr60530: ; SDAG: // %bb.0: -; SDAG-NEXT: mov w0, #-1 +; SDAG-NEXT: mov w0, #-1 // =0xffffffff ; SDAG-NEXT: ret ; ; FAST-LABEL: pr60530: ; FAST: // %bb.0: -; FAST-NEXT: mov w0, #-1 +; FAST-NEXT: mov w0, #-1 // =0xffffffff ; FAST-NEXT: ret ; ; GISEL-LABEL: pr60530: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #1 +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: sbfx w0, w8, #0, #1 ; GISEL-NEXT: ret %1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1) Index: llvm/test/CodeGen/AArch64/arm64_32-addrs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32-addrs.ll +++ llvm/test/CodeGen/AArch64/arm64_32-addrs.ll @@ -42,10 +42,9 @@ define i8 @test_valid_wrap_optimizable2(ptr %base, i32 %offset) { ; CHECK-LABEL: test_valid_wrap_optimizable2: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #-100 +; CHECK-NEXT: mov w8, #-100 ; =0xffffff9c ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: ldrb w0, [x9, x8] +; CHECK-NEXT: ldrb w0, [x8, w1, sxtw] ; CHECK-NEXT: ret %newaddr = getelementptr inbounds i8, ptr inttoptr(i32 -100 to ptr), i32 %offset Index: llvm/test/CodeGen/AArch64/arm64_32.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32.ll +++ llvm/test/CodeGen/AArch64/arm64_32.ll @@ -731,9 +731,8 @@ define void @test_memset(i64 %in, i8 %value) { ; CHECK-LABEL: test_memset: -; CHECK-DAG: and x8, x0, #0xffffffff -; CHECK-DAG: lsr x2, x0, #32 -; CHECK-DAG: mov x0, x8 +; CHECK: lsr x2, x0, #32 +; CHECK-NEXT: and x0, x0, #0xffffffff ; CHECK: b _memset %ptr.i32 = trunc i64 %in to i32 Index: llvm/test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomic-ops-lse.ll +++ llvm/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -1713,9 +1713,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -1735,9 +1735,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -1757,9 +1757,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -1779,9 +1779,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -1965,9 +1965,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -1987,9 +1987,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2629,9 +2629,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2651,9 +2651,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2673,9 +2673,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2695,9 +2695,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2717,9 +2717,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2739,9 +2739,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2761,9 +2761,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2783,9 +2783,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2805,9 +2805,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2827,9 +2827,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2849,9 +2849,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2871,9 +2871,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2893,9 +2893,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2915,9 +2915,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2937,9 +2937,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2959,9 +2959,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -2981,9 +2981,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3003,9 +3003,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3025,9 +3025,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3047,9 +3047,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3069,9 +3069,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3091,9 +3091,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3113,9 +3113,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3135,9 +3135,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3157,9 +3157,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3179,9 +3179,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3201,9 +3201,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3223,9 +3223,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3245,9 +3245,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret @@ -3267,9 +3267,9 @@ ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret Index: llvm/test/CodeGen/AArch64/atomic-ops.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomic-ops.ll +++ llvm/test/CodeGen/AArch64/atomic-ops.ll @@ -245,9 +245,9 @@ ; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i8: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: mvn w0, w0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var8 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: mvn w0, w0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr1_rel ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret @@ -273,9 +273,9 @@ ; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i16: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: mvn w0, w0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var16 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: mvn w0, w0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr2_relax ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret @@ -301,9 +301,9 @@ ; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i32: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: mvn w0, w0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var32 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: mvn w0, w0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret @@ -329,9 +329,9 @@ ; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i64: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: mvn x0, x0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var64 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: mvn x0, x0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -406,10 +406,10 @@ define i32 @test_or_and_and4(i32 %a, i32 %b, ptr %ptr) { ; CHECK-LABEL: test_or_and_and4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xffff000f -; CHECK-NEXT: and w9, w1, #0xfff0 -; CHECK-NEXT: orr w0, w9, w8 -; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: and w8, w1, #0xfff0 +; CHECK-NEXT: and w9, w0, #0xffff000f +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret entry: %and = and i32 %a, -65521 Index: llvm/test/CodeGen/AArch64/cmp-select-sign.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -244,18 +244,18 @@ ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: ; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x10, x5, #0, #1 +; CHECK-NEXT: sbfx x10, x3, #0, #1 ; CHECK-NEXT: orr x9, x8, #0x1 ; CHECK-NEXT: lsr x1, x8, #63 ; CHECK-NEXT: sbfx x8, x7, #0, #1 -; CHECK-NEXT: orr x4, x10, #0x1 -; CHECK-NEXT: lsr x5, x10, #63 -; CHECK-NEXT: orr x6, x8, #0x1 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: sbfx x9, x3, #0, #1 -; CHECK-NEXT: orr x2, x9, #0x1 -; CHECK-NEXT: lsr x3, x9, #63 +; CHECK-NEXT: lsr x3, x10, #63 ; CHECK-NEXT: lsr x7, x8, #63 +; CHECK-NEXT: orr x2, x10, #0x1 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: sbfx x9, x5, #0, #1 +; CHECK-NEXT: lsr x5, x9, #63 +; CHECK-NEXT: orr x4, x9, #0x1 +; CHECK-NEXT: orr x6, x8, #0x1 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr @@ -64,7 +64,7 @@ ; CHECK-NEXT: stlxrb w9, w2, [x0] ; CHECK-NEXT: cbnz w9, LBB1_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_4: ; %cmpxchg.nostore @@ -188,13 +188,13 @@ ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: b LBB3_5 ; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: clrex ; CHECK-NEXT: LBB3_5: ; %for.cond.preheader -; CHECK-NEXT: mov w22, #2 +; CHECK-NEXT: mov w22, #2 ; =0x2 ; CHECK-NEXT: LBB3_6: ; %for.cond ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cbz w22, LBB3_9 @@ -207,8 +207,7 @@ ; CHECK-NEXT: b.eq LBB3_6 ; CHECK-NEXT: ; %bb.8: ; %if.then ; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1 -; CHECK-NEXT: sxtw x8, w22 -; CHECK-NEXT: str w9, [x19, x8, lsl #2] +; CHECK-NEXT: str w9, [x19, w22, sxtw #2] ; CHECK-NEXT: bl _foo ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b LBB3_6 @@ -236,7 +235,7 @@ ; OUTLINE-ATOMICS-NEXT: mov w21, w0 ; OUTLINE-ATOMICS-NEXT: bl ___aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: cmp w0, w21 -; OUTLINE-ATOMICS-NEXT: mov w22, #2 +; OUTLINE-ATOMICS-NEXT: mov w22, #2 ; =0x2 ; OUTLINE-ATOMICS-NEXT: cset w8, eq ; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond ; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -250,8 +249,7 @@ ; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1 ; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then ; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1 -; OUTLINE-ATOMICS-NEXT: sxtw x8, w22 -; OUTLINE-ATOMICS-NEXT: str w9, [x19, x8, lsl #2] +; OUTLINE-ATOMICS-NEXT: str w9, [x19, w22, sxtw #2] ; OUTLINE-ATOMICS-NEXT: bl _foo ; OUTLINE-ATOMICS-NEXT: mov w8, wzr ; OUTLINE-ATOMICS-NEXT: b LBB3_1 Index: llvm/test/CodeGen/AArch64/i128-math.ll =================================================================== --- llvm/test/CodeGen/AArch64/i128-math.ll +++ llvm/test/CodeGen/AArch64/i128-math.ll @@ -265,15 +265,15 @@ ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: mul x9, x3, x0 -; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x8, x3, x0 +; CHECK-NEXT: umulh x9, x3, x0 ; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x8, x0, x2 +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: madd x8, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: umulh x9, x0, x2 ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x1, x8, x9 +; CHECK-NEXT: adds x1, x9, x8 ; CHECK-NEXT: csinc w8, w10, wzr, lo ; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/loop-sink.mir =================================================================== --- llvm/test/CodeGen/AArch64/loop-sink.mir +++ llvm/test/CodeGen/AArch64/loop-sink.mir @@ -328,28 +328,18 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 4, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[ADDXri]] - ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 8, 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64all = COPY [[ADDXri1]] - ; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 12, 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64all = COPY [[ADDXri2]] - ; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 16, 0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64all = COPY [[ADDXri3]] - ; CHECK-NEXT: [[ADDXri4:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 20, 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64all = COPY [[ADDXri4]] - ; CHECK-NEXT: [[ADDXri5:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64all = COPY [[ADDXri5]] ; CHECK-NEXT: [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1..backedge: ; CHECK-NEXT: successors: %bb.9(0x09249249), %bb.2(0x76db6db7) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY7]], %bb.0, %7, %bb.9 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY2]], %bb.0, %7, %bb.9 ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[PHI]], 0 :: (load (s8) from %ir.lsr.iv) ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[LDRBBui]], %subreg.sub_32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 - ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[COPY8]], 50, 0, implicit-def $nzcv + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[COPY3]], 50, 0, implicit-def $nzcv ; CHECK-NEXT: Bcc 8, %bb.9, implicit $nzcv ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2..backedge: @@ -371,7 +361,7 @@ ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY2]] + ; CHECK-NEXT: $x0 = nuw ADDXri [[COPY]], 4, 0 ; CHECK-NEXT: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-NEXT: B %bb.9 @@ -380,7 +370,7 @@ ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY3]] + ; CHECK-NEXT: $x0 = nuw ADDXri [[COPY]], 8, 0 ; CHECK-NEXT: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-NEXT: B %bb.9 @@ -389,7 +379,7 @@ ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY4]] + ; CHECK-NEXT: $x0 = nuw ADDXri [[COPY]], 12, 0 ; CHECK-NEXT: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-NEXT: B %bb.9 @@ -398,7 +388,7 @@ ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY5]] + ; CHECK-NEXT: $x0 = nuw ADDXri [[COPY]], 16, 0 ; CHECK-NEXT: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-NEXT: B %bb.9 @@ -407,15 +397,15 @@ ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY6]] + ; CHECK-NEXT: $x0 = nuw ADDXri [[COPY]], 20, 0 ; CHECK-NEXT: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9..backedge.backedge: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ADDXri6:%[0-9]+]]:gpr64sp = ADDXri [[PHI]], 1, 0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr64all = COPY [[ADDXri6]] + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64sp = ADDXri [[PHI]], 1, 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64all = COPY [[ADDXri1]] ; CHECK-NEXT: B %bb.1 bb.0 (%ir-block.bb): successors: %bb.1(0x80000000) Index: llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll =================================================================== --- llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll +++ llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll @@ -16,8 +16,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -39,8 +39,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -62,8 +62,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -86,7 +86,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w10, w11, [x1] ; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov x9, #-6148914691236517206 +; CHECK-NEXT: mov x9, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa ; CHECK-NEXT: add w0, w10, w11 ; CHECK-NEXT: stp x9, x9, [x8] ; CHECK-NEXT: ret @@ -107,8 +107,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* Index: llvm/test/CodeGen/AArch64/nontemporal-load.ll =================================================================== --- llvm/test/CodeGen/AArch64/nontemporal-load.ll +++ llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -528,12 +528,12 @@ ; CHECK-LABEL: test_ldnp_v4i63: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x12, [x0, #16] -; CHECK-NEXT: extr x11, x9, x8, #63 +; CHECK-NEXT: ldp x10, x11, [x0, #16] +; CHECK-NEXT: extr x12, x9, x8, #63 ; CHECK-NEXT: and x0, x8, #0x7fffffffffffffff ; CHECK-NEXT: extr x9, x10, x9, #62 -; CHECK-NEXT: extr x3, x12, x10, #61 -; CHECK-NEXT: and x1, x11, #0x7fffffffffffffff +; CHECK-NEXT: extr x3, x11, x10, #61 +; CHECK-NEXT: and x1, x12, #0x7fffffffffffffff ; CHECK-NEXT: and x2, x9, #0x7fffffffffffffff ; CHECK-NEXT: ret ; Index: llvm/test/CodeGen/AArch64/optimize-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/optimize-imm.ll +++ llvm/test/CodeGen/AArch64/optimize-imm.ll @@ -44,7 +44,7 @@ define i32 @and4(i32 %a) { ; CHECK-LABEL: and4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #61951 +; CHECK-NEXT: mov w8, #61951 // =0xf1ff ; CHECK-NEXT: and w9, w0, #0xfffc07ff ; CHECK-NEXT: movk w8, #65521, lsl #16 ; CHECK-NEXT: orr w0, w9, w8 @@ -61,7 +61,7 @@ define i32 @xor1(i32 %a) { ; CHECK-LABEL: xor1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #56 +; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: bic w0, w8, w0, lsl #3 ; CHECK-NEXT: ret entry: @@ -78,10 +78,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov w8, #129 +; CHECK-NEXT: mov w9, #8 // =0x8 +; CHECK-NEXT: mov w8, #129 // =0x81 ; CHECK-NEXT: eor x0, x0, x8 -; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: str x9, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll =================================================================== --- llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll +++ llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -5,18 +5,17 @@ ; CHECK-LABEL: jsimd_idct_ifast_neon_intrinsic: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr q0, [x1, #32] -; CHECK-NEXT: mov w8, w3 ; CHECK-NEXT: ldr q1, [x1, #96] ; CHECK-NEXT: ldr q2, [x0, #32] ; CHECK-NEXT: ldr q3, [x0, #96] -; CHECK-NEXT: ldr x9, [x2, #48] +; CHECK-NEXT: ldr x8, [x2, #48] ; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h ; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h ; CHECK-NEXT: add v2.8h, v0.8h, v1.8h -; CHECK-NEXT: str q2, [x9, x8] -; CHECK-NEXT: ldr x9, [x2, #56] +; CHECK-NEXT: str q2, [x8, w3, uxtw] +; CHECK-NEXT: ldr x8, [x2, #56] ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: str q0, [x8, w3, uxtw] ; CHECK-NEXT: ret entry: %add.ptr5 = getelementptr inbounds i16, ptr %coef_block, i64 16 Index: llvm/test/CodeGen/AArch64/rand.ll =================================================================== --- llvm/test/CodeGen/AArch64/rand.ll +++ llvm/test/CodeGen/AArch64/rand.ll @@ -4,12 +4,11 @@ define i32 @rndr(ptr %__addr) { ; CHECK-LABEL: rndr: ; CHECK: // %bb.0: -; CHECK-NEXT: mrs x10, RNDR -; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x10, [x9] +; CHECK-NEXT: mrs x9, RNDR +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: cset w10, eq +; CHECK-NEXT: and w0, w10, #0x1 +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: ret %1 = tail call { i64, i1 } @llvm.aarch64.rndr() %2 = extractvalue { i64, i1 } %1, 0 @@ -23,12 +22,11 @@ define i32 @rndrrs(ptr %__addr) { ; CHECK-LABEL: rndrrs: ; CHECK: // %bb.0: -; CHECK-NEXT: mrs x10, RNDRRS -; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x10, [x9] +; CHECK-NEXT: mrs x9, RNDRRS +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: cset w10, eq +; CHECK-NEXT: and w0, w10, #0x1 +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: ret %1 = tail call { i64, i1 } @llvm.aarch64.rndrrs() %2 = extractvalue { i64, i1 } %1, 0 Index: llvm/test/CodeGen/AArch64/shrink-constant-multiple-users.ll =================================================================== --- llvm/test/CodeGen/AArch64/shrink-constant-multiple-users.ll +++ llvm/test/CodeGen/AArch64/shrink-constant-multiple-users.ll @@ -3,9 +3,10 @@ ; Check the -8 constant is shrunk if there are multiple users of the AND instruction. ; CHECK-LABEL: _test: -; CHECK: and x0, x0, #0xfffffff8 -; CHECK-NEXT: add x19, x0, #10 +; CHECK: and x19, x0, #0xfffffff8 +; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl _user +; CHECK: add x0, x19, #10 define i64 @test(i32 %a) { %ext = zext i32 %a to i64 Index: llvm/test/CodeGen/AArch64/sink-and-fold.ll =================================================================== --- llvm/test/CodeGen/AArch64/sink-and-fold.ll +++ llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -7,16 +7,15 @@ define i32 @f0(i1 %c1, ptr %p) nounwind { ; CHECK-LABEL: f0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: add x0, x1, #8 -; CHECK-NEXT: tbz w8, #0, .LBB0_2 +; CHECK-NEXT: tbz w0, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: add x0, x1, #8 ; CHECK-NEXT: bl use ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.else -; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ldur w0, [x1, #8] ; CHECK-NEXT: ret entry: %a = getelementptr i32, ptr %p, i32 2 @@ -38,16 +37,15 @@ define i32 @f1(i1 %c1, ptr %p, i64 %i) nounwind { ; CHECK-LABEL: f1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: add x0, x1, x2 -; CHECK-NEXT: tbz w8, #0, .LBB1_2 +; CHECK-NEXT: tbz w0, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: add x0, x1, x2 ; CHECK-NEXT: bl use ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %if.else -; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ldr w0, [x1, x2] ; CHECK-NEXT: ret entry: %a = getelementptr i8, ptr %p, i64 %i @@ -105,16 +103,15 @@ define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+lsl-fast" { ; CHECK-LABEL: f3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: add x0, x1, x2, lsl #2 -; CHECK-NEXT: tbz w8, #0, .LBB3_2 +; CHECK-NEXT: tbz w0, #0, .LBB3_2 ; CHECK-NEXT: // %bb.1: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: add x0, x1, x2, lsl #2 ; CHECK-NEXT: bl use ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_2: // %if.else -; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ldr w0, [x1, x2, lsl #2] ; CHECK-NEXT: ret entry: %a = getelementptr i32, ptr %p, i64 %i @@ -139,29 +136,27 @@ ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: b.lt .LBB4_9 ; CHECK-NEXT: // %bb.1: // %LI.preheader -; CHECK-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: mov x23, xzr -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x22, xzr +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: mov x20, x0 -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: b .LBB4_3 ; CHECK-NEXT: .LBB4_2: // %LI.latch ; CHECK-NEXT: // in Loop: Header=BB4_3 Depth=1 -; CHECK-NEXT: cmp x23, x19 -; CHECK-NEXT: mov x23, x24 +; CHECK-NEXT: cmp x22, x19 +; CHECK-NEXT: mov x22, x23 ; CHECK-NEXT: b.ge .LBB4_8 ; CHECK-NEXT: .LBB4_3: // %LI ; CHECK-NEXT: // =>This Loop Header: Depth=1 ; CHECK-NEXT: // Child Loop BB4_6 Depth 2 ; CHECK-NEXT: mov x21, xzr -; CHECK-NEXT: add x24, x23, #1 -; CHECK-NEXT: add x22, x20, x23, lsl #2 +; CHECK-NEXT: add x23, x22, #1 ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_4: // %if.else ; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2 -; CHECK-NEXT: ldr w0, [x22] +; CHECK-NEXT: ldr w0, [x20, x22, lsl #2] ; CHECK-NEXT: .LBB4_5: // %LJ.latch ; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2 ; CHECK-NEXT: add x8, x21, #1 @@ -177,15 +172,14 @@ ; CHECK-NEXT: tbz w8, #31, .LBB4_4 ; CHECK-NEXT: // %bb.7: // %if.then ; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2 -; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: add x0, x20, x22, lsl #2 ; CHECK-NEXT: mov x1, x21 ; CHECK-NEXT: bl use ; CHECK-NEXT: b .LBB4_5 ; CHECK-NEXT: .LBB4_8: -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload ; CHECK-NEXT: .LBB4_9: // %exit ; CHECK-NEXT: ret entry: @@ -238,17 +232,16 @@ ; CHECK-NEXT: // %bb.1: // %L.preheader ; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-NEXT: mov w8, #12 // =0xc -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 -; CHECK-NEXT: smaddl x8, w2, w8, x0 +; CHECK-NEXT: smaddl x20, w2, w8, x0 ; CHECK-NEXT: add x21, x0, #8 ; CHECK-NEXT: mov w22, #-1 // =0xffffffff -; CHECK-NEXT: add x20, x8, #4 ; CHECK-NEXT: b .LBB5_4 ; CHECK-NEXT: .LBB5_2: // %if.else ; CHECK-NEXT: // in Loop: Header=BB5_4 Depth=1 -; CHECK-NEXT: ldr w0, [x20] +; CHECK-NEXT: ldur w0, [x20, #4] ; CHECK-NEXT: .LBB5_3: // %L.latch ; CHECK-NEXT: // in Loop: Header=BB5_4 Depth=1 ; CHECK-NEXT: add w22, w22, #1 @@ -261,8 +254,8 @@ ; CHECK-NEXT: tbz w8, #31, .LBB5_2 ; CHECK-NEXT: // %bb.5: // %if.then ; CHECK-NEXT: // in Loop: Header=BB5_4 Depth=1 +; CHECK-NEXT: add x0, x20, #4 ; CHECK-NEXT: add w1, w22, #1 -; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: bl use ; CHECK-NEXT: b .LBB5_3 ; CHECK-NEXT: .LBB5_6: @@ -301,125 +294,3 @@ exit: ret void } - -define i32 @f6(i1 %c, ptr %a, i32 %i) { -; CHECK-LABEL: f6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: sxtw x8, w2 -; CHECK-NEXT: tbz w0, #0, .LBB6_2 -; CHECK-NEXT: // %bb.1: // %if.then -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: str wzr, [x1, x8, lsl #2] -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB6_2: // %if.else -; CHECK-NEXT: ldr w0, [x1, x8, lsl #2] -; CHECK-NEXT: ret -entry: - %j = sext i32 %i to i64 - br i1 %c, label %if.then, label %if.else - -if.then: - %p0 = getelementptr i32, ptr %a, i64 %j - store i32 0, ptr %p0 - br label %exit - -if.else: - %p1 = getelementptr i32, ptr %a, i64 %j - %v0 = load i32, ptr %p1 - br label %exit - -exit: - %v = phi i32 [0, %if.then], [%v0, %if.else] - ret i32 %v -} - -define i8 @f7(i1 %c, ptr %a, i32 %i) { -; CHECK-LABEL: f7: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: tbz w0, #0, .LBB7_2 -; CHECK-NEXT: // %bb.1: // %if.then -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: strb wzr, [x1, x8] -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB7_2: // %if.else -; CHECK-NEXT: ldrb w0, [x1, x8] -; CHECK-NEXT: ret -entry: - %j = zext i32 %i to i64 - br i1 %c, label %if.then, label %if.else - -if.then: - %p0 = getelementptr i8, ptr %a, i64 %j - store i8 0, ptr %p0 - br label %exit - -if.else: - %p1 = getelementptr i8, ptr %a, i64 %j - %v0 = load i8, ptr %p1 - br label %exit - -exit: - %v = phi i8 [0, %if.then], [%v0, %if.else] - ret i8 %v -} - -define i32 @f8(i1 %c, ptr %a, i32 %i) { -; CHECK-LABEL: f8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add x8, x1, w2, sxtw #2 -; CHECK-NEXT: tbz w0, #0, .LBB8_2 -; CHECK-NEXT: // %bb.1: // %if.then -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: str wzr, [x8] -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB8_2: // %if.else -; CHECK-NEXT: ldr w0, [x8] -; CHECK-NEXT: ret -entry: - %p = getelementptr i32, ptr %a, i32 %i - br i1 %c, label %if.then, label %if.else - -if.then: - store i32 0, ptr %p - br label %exit - -if.else: - %v0 = load i32, ptr %p - br label %exit - -exit: - %v = phi i32 [0, %if.then], [%v0, %if.else] - ret i32 %v -} - -define i64 @f9(i1 %c, ptr %a, i32 %i) { -; CHECK-LABEL: f9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: tbz w0, #0, .LBB9_2 -; CHECK-NEXT: // %bb.1: // %if.then -; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: str xzr, [x1, x8, lsl #3] -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB9_2: // %if.else -; CHECK-NEXT: ldr x0, [x1, x8, lsl #3] -; CHECK-NEXT: ret -entry: - %j = zext i32 %i to i64 - %p = getelementptr i64, ptr %a, i64 %j - br i1 %c, label %if.then, label %if.else - -if.then: - store i64 0, ptr %p - br label %exit - -if.else: - %v0 = load i64, ptr %p - br label %exit - -exit: - %v = phi i64 [0, %if.then], [%v0, %if.else] - ret i64 %v -} Index: llvm/test/CodeGen/AArch64/sve-ld1r.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ld1r.ll +++ llvm/test/CodeGen/AArch64/sve-ld1r.ll @@ -1192,9 +1192,8 @@ ; CHECK-LABEL: avoid_preindex_load: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, x0, #1 ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 @@ -1211,9 +1210,8 @@ define i8* @avoid_preindex_load_dup(i8* %src, %pg, * %out) { ; CHECK-LABEL: avoid_preindex_load_dup: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #1 ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret @@ -1229,9 +1227,8 @@ define i8* @avoid_preindex_load_dup_passthru_zero(i8* %src, %pg, * %out) { ; CHECK-LABEL: avoid_preindex_load_dup_passthru_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #1 ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/swift-async-win.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-async-win.ll +++ llvm/test/CodeGen/AArch64/swift-async-win.ll @@ -22,14 +22,13 @@ ; CHECK-NEXT: stp x30, x29, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #24 ; CHECK-NEXT: str x19, [sp, #40] // 8-byte Folded Spill -; CHECK-NEXT: sub x8, x29, #8 -; CHECK-NEXT: str xzr, [sp, #16] ; CHECK-NEXT: adrp x19, __imp_swift_task_dealloc -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: str x9, [x8] +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: stur x8, [x29, #-8] ; CHECK-NEXT: ldr x20, [x0] -; CHECK-NEXT: ldp x22, x0, [x9, #16] -; CHECK-NEXT: str x20, [x8] +; CHECK-NEXT: ldp x22, x0, [x8, #16] +; CHECK-NEXT: stur x20, [x29, #-8] ; CHECK-NEXT: ldr x19, [x19, :lo12:__imp_swift_task_dealloc] ; CHECK-NEXT: blr x19 ; CHECK-NEXT: mov x0, x22 Index: llvm/test/CodeGen/AArch64/swift-async.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-async.ll +++ llvm/test/CodeGen/AArch64/swift-async.ll @@ -120,8 +120,7 @@ define swifttailcc void @write_frame_context(ptr swiftasync %ctx, ptr %newctx) "frame-pointer"="non-leaf" { ; CHECK-LABEL: write_frame_context: -; CHECK: sub x[[ADDR:[0-9]+]], x29, #8 -; CHECK: str x0, [x[[ADDR]]] +; CHECK: stur x0, [x29, #-8] %ptr = call ptr @llvm.swift.async.context.addr() store ptr %newctx, ptr %ptr ret void