diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -21,6 +21,7 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H #define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -60,12 +61,14 @@ /// Initialize the field members using \p MF. void init(MachineFunction &MF); + typedef SmallSetVector LocalizedSetVecT; + /// Do inter-block localization from the entry block. bool localizeInterBlock(MachineFunction &MF, - SmallPtrSetImpl &LocalizedInstrs); + LocalizedSetVecT &LocalizedInstrs); /// Do intra-block localization of already localized instructions. - bool localizeIntraBlock(SmallPtrSetImpl &LocalizedInstrs); + bool localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs); public: Localizer(); diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -76,6 +75,7 @@ case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_INTTOPTR: return true; case TargetOpcode::G_GLOBAL_VALUE: { unsigned RematCost = TTI->getGISelRematGlobalCost(); @@ -104,8 +104,8 @@ return InsertMBB == Def.getParent(); } -bool Localizer::localizeInterBlock( - MachineFunction &MF, SmallPtrSetImpl &LocalizedInstrs) { +bool Localizer::localizeInterBlock(MachineFunction &MF, + LocalizedSetVecT &LocalizedInstrs) { bool Changed = false; DenseMap, unsigned> MBBWithLocalDef; @@ -114,7 +114,8 @@ // we only localize instructions in the entry block here. This might change if // we start doing CSE across blocks. auto &MBB = MF.front(); - for (MachineInstr &MI : MBB) { + for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) { + MachineInstr &MI = *RI; if (!shouldLocalize(MI)) continue; LLVM_DEBUG(dbgs() << "Should localize: " << MI); @@ -166,8 +167,7 @@ return Changed; } -bool Localizer::localizeIntraBlock( - SmallPtrSetImpl &LocalizedInstrs) { +bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { bool Changed = false; // For each already-localized instruction which has multiple users, then we @@ -179,15 +179,16 @@ for (MachineInstr *MI : LocalizedInstrs) { unsigned Reg = MI->getOperand(0).getReg(); MachineBasicBlock &MBB = *MI->getParent(); - // If the instruction has a single use, we would have already moved it right - // before its user in localizeInterBlock(). - if (MRI->hasOneUse(Reg)) - continue; - // All of the user MIs of this reg. SmallPtrSet Users; - for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) - Users.insert(&UseMI); + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { + if (!UseMI.isPHI()) + Users.insert(&UseMI); + } + // If all the users were PHIs then they're not going to be in our block, + // don't try to move this instruction. + if (Users.empty()) + continue; MachineBasicBlock::iterator II(MI); ++II; @@ -216,7 +217,7 @@ // Keep track of the instructions we localized. We'll do a second pass of // intra-block localization to further reduce live ranges. - SmallPtrSet LocalizedInstrs; + LocalizedSetVecT LocalizedInstrs; bool Changed = localizeInterBlock(MF, LocalizedInstrs); return Changed |= localizeIntraBlock(LocalizedInstrs); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -38,6 +38,8 @@ ret i32 0 } + define void @test_inttoptr() { ret void } + ... --- @@ -350,8 +352,8 @@ ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2) - ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1) ; CHECK: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 ; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3) @@ -388,3 +390,63 @@ RET_ReallyLR implicit $w0 ... +--- +name: test_inttoptr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_inttoptr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128 + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[C2:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0 + ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C2]](s64) + ; CHECK: [[INTTOPTR1:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C]](s64) + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: G_STORE [[ADD]](s32), [[COPY1]](p0) :: (store 4) + ; CHECK: [[C3:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128 + ; CHECK: [[INTTOPTR2:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C3]](s64) + ; CHECK: $x0 = COPY [[INTTOPTR2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK: bb.2: + ; CHECK: [[C4:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0 + ; CHECK: [[INTTOPTR3:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C4]](s64) + ; CHECK: $x0 = COPY [[INTTOPTR3]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + bb.1: + liveins: $w0, $x1 + + %0:gpr(s32) = COPY $w0 + %1:gpr(p0) = COPY $x1 + %2:gpr(s64) = G_CONSTANT i64 128 + %4:gpr(s32) = G_CONSTANT i32 0 + %7:gpr(s64) = G_CONSTANT i64 0 + %6:gpr(p0) = G_INTTOPTR %7(s64) + %3:gpr(p0) = G_INTTOPTR %2(s64) + %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %4 + %5:gpr(s1) = G_TRUNC %9(s32) + G_BRCOND %5(s1), %bb.2 + G_BR %bb.3 + + bb.2: + %8:gpr(s32) = G_ADD %0, %0 + G_STORE %8(s32), %1(p0) :: (store 4) + $x0 = COPY %3(p0) + RET_ReallyLR implicit $x0 + + bb.3: + $x0 = COPY %6(p0) + RET_ReallyLR implicit $x0 + +...