Index: llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -46,6 +46,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -60,6 +61,8 @@ "Number of HW prefetch tag collisions avoided"); STATISTIC(NumCollisionsNotAvoided, "Number of HW prefetch tag collisions not avoided due to lack of registers"); +DEBUG_COUNTER(FixCounter, "falkor-hwpf", + "Controls which tag collisions are avoided"); namespace { @@ -730,6 +733,21 @@ bool Fixed = false; DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); + if (!DebugCounter::shouldExecute(FixCounter)) { + DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI); + continue; + } + + // Add the non-base registers of MI as live so we don't use them as + // scratch registers. + for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) { + if (OpI == static_cast(LdI.BaseRegIdx)) + continue; + MachineOperand &MO = MI.getOperand(OpI); + if (MO.isReg() && MO.readsReg()) + LR.addReg(MO.getReg()); + } + for (unsigned ScratchReg : AArch64::GPR64RegClass) { if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) continue; Index: llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir +++ llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir @@ -353,3 +353,28 @@ bb.1: RET_ReallyLR ... +--- +# Check that non-base registers are considered live when finding a +# scratch register by making sure we don't use $x2 for the scratch +# register for the inserted ORRXrs. +# CHECK-LABEL: name: hwpf_offreg +# CHECK: $x3 = ORRXrs $xzr, $x1, 0 +# CHECK: $w10 = LDRWroX $x3, $x2, 0, 0 +name: hwpf_offreg +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1, $x2, $x17, $x18 + + $w10 = LDRWroX $x1, $x2, 0, 0 :: ("aarch64-strided-access" load 4) + + $x2 = ORRXrs $xzr, $x10, 0 + $w26 = LDRWroX $x1, $x2, 0, 0 + + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv + + bb.1: + RET_ReallyLR +...