diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCUtil.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -75,6 +76,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" @@ -20757,6 +20759,33 @@ bool AArch64TargetLowering::shouldLocalize( const MachineInstr &MI, const TargetTransformInfo *TTI) const { + auto &MF = *MI.getMF(); + auto &MRI = MF.getRegInfo(); + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + + switch (MI.getOpcode()) { case TargetOpcode::G_GLOBAL_VALUE: { // On Darwin, TLS global vars get selected into function calls, which @@ -20767,6 +20796,19 @@ return false; break; } + case TargetOpcode::G_CONSTANT: { + auto *CI = MI.getOperand(1).getCImm(); + APInt Imm = CI->getValue(); + InstructionCost Cost = TTI->getIntImmCost( + Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize); + assert(Cost.isValid() && "Expected a valid imm cost"); + + unsigned RematCost = *Cost.getValue(); + Register Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being // localizable. case AArch64::ADRP: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -114,3 +114,58 @@ ret i32 0 } +define i32 @imm_cost_too_large() { + ; CHECK-LABEL: name: imm_cost_too_large + ; CHECK: bb.1.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 -2228259 + ; CHECK-NEXT: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK-NEXT: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[ICMP]], [[C2]] + ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.4 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK-NEXT: G_STORE [[C]](s32), [[GV3]](p0) :: (store (s32) into @var2) + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.then2: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 + ; CHECK-NEXT: G_STORE [[C]](s32), [[GV4]](p0) :: (store (s32) into @var1) + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.if.end: + ; CHECK-NEXT: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK-NEXT: G_STORE [[C]](s32), [[GV5]](p0) :: (store (s32) into @var3) + ; CHECK-NEXT: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C3]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %0 = load i32, i32* @var1, align 4 + %cst1 = bitcast i32 -2228259 to i32 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %cst1, i32* @var2 + br label %if.then2 + +if.then2: + store i32 %cst1, i32* @var1 + br label %if.end + +if.end: + store i32 %cst1, i32* @var3 + ret i32 0 +}