diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -678,14 +678,14 @@ assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && "Expected promotable zero stores."); - MachineBasicBlock::iterator NextI = I; - ++NextI; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == MergeMI) - ++NextI; + NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); bool IsScaled = !TII->isUnscaledLdSt(Opc); @@ -748,18 +748,17 @@ const TargetRegisterInfo *TRI, unsigned Limit, std::function &Fn) { auto MBB = MI.getParent(); - for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(), - E = MBB->rend(); - I != E; I++) { + for (MachineInstr &I : + instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) { if (!Limit) return false; --Limit; - bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) { + bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) { return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() && TRI->regsOverlap(MOP.getReg(), DefReg); }); - if (!Fn(*I, isDef)) + if (!Fn(I, isDef)) return false; if (isDef) break; @@ -783,14 +782,14 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags) { - MachineBasicBlock::iterator NextI = I; - ++NextI; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) - ++NextI; + NextI = next_nodbg(NextI, E); int SExtIdx = Flags.getSExtIdx(); unsigned Opc = @@ -1009,8 +1008,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, MachineBasicBlock::iterator StoreI) { - MachineBasicBlock::iterator NextI = LoadI; - ++NextI; + MachineBasicBlock::iterator NextI = + next_nodbg(LoadI, LoadI->getParent()->end()); int LoadSize = TII->getMemScale(*LoadI); int StoreSize = TII->getMemScale(*StoreI); @@ -1188,7 +1187,7 @@ unsigned Count = 0; do { - --MBBI; + MBBI = prev_nodbg(MBBI, B); MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there @@ -1440,7 +1439,7 @@ MachineBasicBlock::iterator MBBI = I; MachineBasicBlock::iterator MBBIWithRenameReg; MachineInstr &FirstMI = *I; - ++MBBI; + MBBI = next_nodbg(MBBI, E); bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); @@ -1468,7 +1467,8 @@ // Remember any instructions that read/write memory between FirstMI and MI. SmallVector MemInsns; - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + for (unsigned Count = 0; MBBI != E && Count < Limit; + MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; UsedInBetween.accumulate(MI); @@ -1637,12 +1637,13 @@ assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; + if (NextI == Update) + NextI = next_nodbg(NextI, E); int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && @@ -1780,7 +1781,7 @@ // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); UsedRegUnits.clear(); - ++MBBI; + MBBI = next_nodbg(MBBI, E); // We can't post-increment the stack pointer if any instruction between // the memory access (I) and the increment (MBBI) can access the memory @@ -1796,7 +1797,8 @@ return E; } - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + for (unsigned Count = 0; MBBI != E && Count < Limit; + MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there @@ -1854,7 +1856,7 @@ UsedRegUnits.clear(); unsigned Count = 0; do { - --MBBI; + MBBI = prev_nodbg(MBBI, B); MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir copy from llvm/test/CodeGen/AArch64/ldst-opt-mte.mir copy to llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir --- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir +++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir @@ -1,4 +1,8 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s +# Strip out debug info, then run ldst-opt with limit=1. +# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s +# +# Run ldst-opt with limit=1, then strip out debug info. +# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s --- ### STG and its offset limits @@ -11,7 +15,11 @@ liveins: $x0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -23,7 +31,11 @@ liveins: $x0, $x1 STGOffset $x1, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -36,7 +48,11 @@ liveins: $x0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 8, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -48,7 +64,11 @@ liveins: $x0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = SUBXri $x0, 4096, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -61,7 +81,11 @@ liveins: $x0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = SUBXri $x0, 4112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -73,7 +97,11 @@ liveins: $x0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 4080, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -86,7 +114,11 @@ liveins: $x0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 4096, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -100,7 +132,11 @@ liveins: $x0 STZGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -112,7 +148,11 @@ liveins: $x0 ST2GOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -124,7 +164,11 @@ liveins: $x0 STZ2GOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -138,7 +182,11 @@ liveins: $x0, $x1, $x2 STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -150,7 +198,11 @@ liveins: $x0, $x1, $x2 STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = SUBXri $x0, 1024, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -163,7 +215,11 @@ liveins: $x0, $x1, $x2 STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = SUBXri $x0, 1040, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -175,7 +231,11 @@ liveins: $x0, $x1, $x2 STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 1008, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -188,7 +248,11 @@ liveins: $x0, $x1, $x2 STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 1024, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -202,7 +266,11 @@ liveins: $x0 STGOffset $x0, $x0, 10 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 160, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -214,7 +282,11 @@ liveins: $x0, $x1, $x2 STGPi $x1, $x2, $x0, 10 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 160, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -228,7 +300,11 @@ liveins: $x0 $x0 = ADDXri $x0, 32, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -240,7 +316,11 @@ liveins: $x0, $x1, $x2 $x0 = SUBXri $x0, 48, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -254,7 +334,11 @@ liveins: $x0 STGPi $x0, $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -266,7 +350,11 @@ liveins: $x0 STGPi $x0, $x0, $x0, 7 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... @@ -280,6 +368,10 @@ liveins: $x0 $x0 = SUBXri $x0, 48, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 STGPi $x0, $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir --- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir +++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s --- ### STG and its offset limits