diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -24,10 +24,20 @@ [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]), (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>; +// AArch64-specific offset folding for G_GLOBAL_VALUE. +def fold_global_offset_matchdata : GIDefMatchData<"std::pair">; +def fold_global_offset : GICombineRule< + (defs root:$root, fold_global_offset_matchdata:$matchinfo), + (match (wip_match_opcode G_GLOBAL_VALUE):$root, + [{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]), + (apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}]) +>; + def AArch64PreLegalizerCombinerHelper: GICombinerHelper< "AArch64GenPreLegalizerCombinerHelper", [all_combines, fconstant_to_constant, - icmp_redundant_trunc]> { + icmp_redundant_trunc, + fold_global_offset]> { let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; let StateClass = "AArch64PreLegalizerCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5628,8 +5628,10 @@ return None; // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. - // TODO: Need to check GV's offset % size if doing offset folding into globals. - assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); + auto Offset = Adrp.getOperand(1).getOffset(); + if (Offset % Size != 0) + return None; + auto GV = Adrp.getOperand(1).getGlobal(); if (GV->isThreadLocal()) return None; @@ -5643,7 +5645,7 @@ Register AdrpReg = Adrp.getOperand(0).getReg(); return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, [=](MachineInstrBuilder &MIB) { - MIB.addGlobalAddress(GV, /* Offset */ 0, + MIB.addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); }}}; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -781,7 +781,8 @@ // G_ADD_LOW instructions. // By splitting this here, we can optimize accesses in the small code model by // folding in the G_ADD_LOW into the load/store offset. - auto GV = MI.getOperand(1).getGlobal(); + auto &GlobalOp = MI.getOperand(1); + const auto* GV = GlobalOp.getGlobal(); if (GV->isThreadLocal()) return true; // Don't want to modify TLS vars. @@ -791,9 +792,10 @@ if (OpFlags & AArch64II::MO_GOT) return true; + auto Offset = GlobalOp.getOffset(); Register DstReg = MI.getOperand(0).getReg(); auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) - .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); + .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE); // Set the regclass on the dest reg too. MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); @@ -811,6 +813,8 @@ // binary must also be loaded into address range [0, 2^48). Both of these // properties need to be ensured at runtime when using tagged addresses. if (OpFlags & AArch64II::MO_TAGGED) { + assert(!Offset && + "Should not have folded in an offset for a tagged global!"); ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) .addGlobalAddress(GV, 0x100000000, AArch64II::MO_PREL | AArch64II::MO_G3) @@ -819,7 +823,7 @@ } MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) - .addGlobalAddress(GV, 0, + .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -107,6 +107,116 @@ return true; } +/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE. +/// +/// e.g. +/// +/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst +static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, + std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); + MachineFunction &MF = *MI.getMF(); + auto &GlobalOp = MI.getOperand(1); + auto *GV = GlobalOp.getGlobal(); + + // Don't allow anything that could represent offsets etc. + if (MF.getSubtarget().ClassifyGlobalReference( + GV, MF.getTarget()) != AArch64II::MO_NO_FLAG) + return false; + + // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants: + // + // %g = G_GLOBAL_VALUE @x + // %ptr1 = G_PTR_ADD %g, cst1 + // %ptr2 = G_PTR_ADD %g, cst2 + // ... + // %ptrN = G_PTR_ADD %g, cstN + // + // Identify the *smallest* constant. We want to be able to form this: + // + // %offset_g = G_GLOBAL_VALUE @x + min_cst + // %g = G_PTR_ADD %offset_g, -min_cst + // %ptr1 = G_PTR_ADD %g, cst1 + // ... + Register Dst = MI.getOperand(0).getReg(); + uint64_t MinOffset = -1ull; + for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) { + if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD) + return false; + auto Cst = + getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI); + if (!Cst) + return false; + MinOffset = std::min(MinOffset, Cst->Value.getZExtValue()); + } + + // Require that the new offset is larger than the existing one to avoid + // infinite loops. + uint64_t CurrOffset = GlobalOp.getOffset(); + uint64_t NewOffset = MinOffset + CurrOffset; + if (NewOffset <= CurrOffset) + return false; + + // Check whether folding this offset is legal. It must not go out of bounds of + // the referenced object to avoid violating the code model, and must be + // smaller than 2^21 because this is the largest offset expressible in all + // object formats. + // + // This check also prevents us from folding negative offsets, which will end + // up being treated in the same way as large positive ones. They could also + // cause code model violations, and aren't really common enough to matter. + if (NewOffset >= (1 << 21)) + return false; + + Type *T = GV->getValueType(); + if (!T->isSized() || + NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T)) + return false; + MatchInfo = std::make_pair(NewOffset, MinOffset); + return true; +} + +static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer, + std::pair &MatchInfo) { + // Change: + // + // %g = G_GLOBAL_VALUE @x + // %ptr1 = G_PTR_ADD %g, cst1 + // %ptr2 = G_PTR_ADD %g, cst2 + // ... + // %ptrN = G_PTR_ADD %g, cstN + // + // To: + // + // %offset_g = G_GLOBAL_VALUE @x + min_cst + // %g = G_PTR_ADD %offset_g, -min_cst + // %ptr1 = G_PTR_ADD %g, cst1 + // ... + // %ptrN = G_PTR_ADD %g, cstN + // + // Then, the original G_PTR_ADDs should be folded later on so that they look + // like this: + // + // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst + uint64_t Offset, MinOffset; + std::tie(Offset, MinOffset) = MatchInfo; + B.setInstrAndDebugLoc(MI); + Observer.changingInstr(MI); + auto &GlobalOp = MI.getOperand(1); + auto *GV = GlobalOp.getGlobal(); + GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags()); + Register Dst = MI.getOperand(0).getReg(); + Register NewGVDst = MRI.cloneVirtualRegister(Dst); + MI.getOperand(0).setReg(NewGVDst); + Observer.changedInstr(MI); + B.buildPtrAdd( + Dst, NewGVDst, + B.buildConstant(LLT::scalar(64), -static_cast(MinOffset))); + return true; +} + class AArch64PreLegalizerCombinerHelperState { protected: CombinerHelper &Helper; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir @@ -0,0 +1,241 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=DEFAULT,CHECK +# RUN: llc -mtriple aarch64-apple-darwin -code-model=large -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=LARGE-MACHO,CHECK +# RUN: llc -mtriple aarch64-apple-darwin -code-model=small -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=SMALL-MACHO,CHECK +# RUN: llc -mtriple aarch64-linux-elf -code-model=large -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=LARGE-ELF,CHECK +# RUN: llc -mtriple aarch64-linux-elf -code-model=tiny -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=TINY,CHECK +# RUN: llc -mtriple aarch64-windows-coff -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=WINDOWS,CHECK + +# Each of these tests has a trivial pattern for folding a G_PTR_ADD into a +# G_GLOBAL_VALUE. +# +# Check that given different code models/target features, we do/don't fold. + +--- | + @external_linkage = external hidden global i32 + @common_linkage = common local_unnamed_addr global i32 0, align 4 + @internal_linkage = internal unnamed_addr global i32 0, align 4 + @extern_weak_linkage = extern_weak hidden global i32 + @dll_import = external dllimport global i32 + + define void @test_external_linkage() { ret void } + define void @test_internal_linkage() { ret void } + define void @test_common_linkage() { ret void } + define void @test_extern_weak_linkage() { ret void } + define void @never_fold_tagged_globals() #0 { ret void } + define void @test_dll_import() { ret void } + + attributes #0 = { "target-features"="+tagged-globals" } +... +--- +name: test_external_linkage +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; Large + Mach-O goes via GOT, so we can't fold. + + ; DEFAULT-LABEL: name: test_external_linkage + ; DEFAULT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1 + ; DEFAULT: $x0 = COPY [[GV]](p0) + ; DEFAULT: RET_ReallyLR implicit $x0 + ; LARGE-MACHO-LABEL: name: test_external_linkage + ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @external_linkage + ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1 + ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; LARGE-MACHO: $x0 = COPY %ptr_add(p0) + ; LARGE-MACHO: RET_ReallyLR implicit $x0 + ; SMALL-MACHO-LABEL: name: test_external_linkage + ; SMALL-MACHO: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1 + ; SMALL-MACHO: $x0 = COPY [[GV]](p0) + ; SMALL-MACHO: RET_ReallyLR implicit $x0 + ; LARGE-ELF-LABEL: name: test_external_linkage + ; LARGE-ELF: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1 + ; LARGE-ELF: $x0 = COPY [[GV]](p0) + ; LARGE-ELF: RET_ReallyLR implicit $x0 + ; TINY-LABEL: name: test_external_linkage + ; TINY: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1 + ; TINY: $x0 = COPY [[GV]](p0) + ; TINY: RET_ReallyLR implicit $x0 + ; WINDOWS-LABEL: name: test_external_linkage + ; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1 + ; WINDOWS: $x0 = COPY [[GV]](p0) + ; WINDOWS: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @external_linkage + %imm:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: test_internal_linkage +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; Large + Mach-O goes via GOT, so we can't fold. + + ; DEFAULT-LABEL: name: test_internal_linkage + ; DEFAULT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1 + ; DEFAULT: $x0 = COPY [[GV]](p0) + ; DEFAULT: RET_ReallyLR implicit $x0 + ; LARGE-MACHO-LABEL: name: test_internal_linkage + ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @internal_linkage + ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1 + ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; LARGE-MACHO: $x0 = COPY %ptr_add(p0) + ; LARGE-MACHO: RET_ReallyLR implicit $x0 + ; SMALL-MACHO-LABEL: name: test_internal_linkage + ; SMALL-MACHO: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1 + ; SMALL-MACHO: $x0 = COPY [[GV]](p0) + ; SMALL-MACHO: RET_ReallyLR implicit $x0 + ; LARGE-ELF-LABEL: name: test_internal_linkage + ; LARGE-ELF: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1 + ; LARGE-ELF: $x0 = COPY [[GV]](p0) + ; LARGE-ELF: RET_ReallyLR implicit $x0 + ; TINY-LABEL: name: test_internal_linkage + ; TINY: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1 + ; TINY: $x0 = COPY [[GV]](p0) + ; TINY: RET_ReallyLR implicit $x0 + ; WINDOWS-LABEL: name: test_internal_linkage + ; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1 + ; WINDOWS: $x0 = COPY [[GV]](p0) + ; WINDOWS: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @internal_linkage + %imm:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: test_common_linkage +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; DEFAULT-LABEL: name: test_common_linkage + ; DEFAULT: %global:_(p0) = G_GLOBAL_VALUE @common_linkage + ; DEFAULT: %imm:_(s64) = G_CONSTANT i64 1 + ; DEFAULT: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; DEFAULT: $x0 = COPY %ptr_add(p0) + ; DEFAULT: RET_ReallyLR implicit $x0 + ; LARGE-MACHO-LABEL: name: test_common_linkage + ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @common_linkage + ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1 + ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; LARGE-MACHO: $x0 = COPY %ptr_add(p0) + ; LARGE-MACHO: RET_ReallyLR implicit $x0 + ; SMALL-MACHO-LABEL: name: test_common_linkage + ; SMALL-MACHO: %global:_(p0) = G_GLOBAL_VALUE @common_linkage + ; SMALL-MACHO: %imm:_(s64) = G_CONSTANT i64 1 + ; SMALL-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; SMALL-MACHO: $x0 = COPY %ptr_add(p0) + ; SMALL-MACHO: RET_ReallyLR implicit $x0 + ; LARGE-ELF-LABEL: name: test_common_linkage + ; LARGE-ELF: %global:_(p0) = G_GLOBAL_VALUE @common_linkage + ; LARGE-ELF: %imm:_(s64) = G_CONSTANT i64 1 + ; LARGE-ELF: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; LARGE-ELF: $x0 = COPY %ptr_add(p0) + ; LARGE-ELF: RET_ReallyLR implicit $x0 + ; TINY-LABEL: name: test_common_linkage + ; TINY: %global:_(p0) = G_GLOBAL_VALUE @common_linkage + ; TINY: %imm:_(s64) = G_CONSTANT i64 1 + ; TINY: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; TINY: $x0 = COPY %ptr_add(p0) + ; TINY: RET_ReallyLR implicit $x0 + ; WINDOWS-LABEL: name: test_common_linkage + ; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @common_linkage + 1 + ; WINDOWS: $x0 = COPY [[GV]](p0) + ; WINDOWS: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @common_linkage + %imm:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: test_extern_weak_linkage +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; DEFAULT-LABEL: name: test_extern_weak_linkage + ; DEFAULT: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + ; DEFAULT: %imm:_(s64) = G_CONSTANT i64 1 + ; DEFAULT: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; DEFAULT: $x0 = COPY %ptr_add(p0) + ; DEFAULT: RET_ReallyLR implicit $x0 + ; LARGE-MACHO-LABEL: name: test_extern_weak_linkage + ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1 + ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; LARGE-MACHO: $x0 = COPY %ptr_add(p0) + ; LARGE-MACHO: RET_ReallyLR implicit $x0 + ; SMALL-MACHO-LABEL: name: test_extern_weak_linkage + ; SMALL-MACHO: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + ; SMALL-MACHO: %imm:_(s64) = G_CONSTANT i64 1 + ; SMALL-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; SMALL-MACHO: $x0 = COPY %ptr_add(p0) + ; SMALL-MACHO: RET_ReallyLR implicit $x0 + ; LARGE-ELF-LABEL: name: test_extern_weak_linkage + ; LARGE-ELF: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + ; LARGE-ELF: %imm:_(s64) = G_CONSTANT i64 1 + ; LARGE-ELF: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; LARGE-ELF: $x0 = COPY %ptr_add(p0) + ; LARGE-ELF: RET_ReallyLR implicit $x0 + ; TINY-LABEL: name: test_extern_weak_linkage + ; TINY: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + ; TINY: %imm:_(s64) = G_CONSTANT i64 1 + ; TINY: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; TINY: $x0 = COPY %ptr_add(p0) + ; TINY: RET_ReallyLR implicit $x0 + ; WINDOWS-LABEL: name: test_extern_weak_linkage + ; WINDOWS: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + ; WINDOWS: %imm:_(s64) = G_CONSTANT i64 1 + ; WINDOWS: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + ; WINDOWS: $x0 = COPY %ptr_add(p0) + ; WINDOWS: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage + %imm:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: never_fold_tagged_globals +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; CHECK-LABEL: name: never_fold_tagged_globals + ; CHECK-NOT: %global:_(p0) = G_GLOBAL_VALUE @external_linkage + 1 + %global:_(p0) = G_GLOBAL_VALUE @external_linkage + %imm:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: test_dll_import +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; CHECK-LABEL: name: test_dll_import + ; CHECK-NOT: %global:_(p0) = G_GLOBAL_VALUE @dll_import + 1 + %global:_(p0) = G_GLOBAL_VALUE @dll_import + %imm:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir @@ -0,0 +1,284 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-darwin -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- | + @g = external hidden global i32 + + %opaque = type opaque + @unsized = external hidden global %opaque + + define void @one_ptr_add() { ret void } + define void @add_to_offset() { ret void } + define void @two_ptr_adds_same_offset() { ret void } + define void @two_ptr_adds_different_offset() { ret void } + define void @ptr_add_chain() { ret void } + + define void @dont_fold_negative_offset() { ret void } + define void @dont_min_offset_less_than_curr_offset() { ret void } + define void @dont_fold_max_offset() { ret void } + define void @dont_fold_offset_larger_than_type_alloc() { ret void } + define void @dont_fold_unsized_type() { ret void } +... +--- +name: one_ptr_add +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + ; We should fold the offset 1 into the G_GLOBAL_VALUE. + + ; CHECK-LABEL: name: one_ptr_add + ; CHECK: liveins: $x0 + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1 + ; CHECK: $x0 = COPY [[GV]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add + RET_ReallyLR implicit $x0 + +... +--- +name: add_to_offset +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + ; We should fold the offset 1 into the G_GLOBAL_VALUE, resulting in a + ; final offset of 4. + + ; CHECK-LABEL: name: add_to_offset + ; CHECK: liveins: $x0 + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 4 + ; CHECK: $x0 = COPY [[GV]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + 3 + %offset:_(s64) = G_CONSTANT i64 1 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add + RET_ReallyLR implicit $x0 + +... +--- +name: two_ptr_adds_same_offset +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; We're allowed to have more than one G_PTR_ADD use. We should fold 1 into + ; the G_GLOBAL_VALUE's offset. + + ; CHECK-LABEL: name: two_ptr_adds_same_offset + ; CHECK: liveins: $x0, $x1 + ; CHECK: %val1:_(s64) = COPY $x0 + ; CHECK: %val2:_(s64) = COPY $x1 + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1 + ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8) + ; CHECK: G_STORE %val2(s64), [[GV]](p0) :: (store 8) + ; CHECK: RET_ReallyLR implicit $x0 + %val1:_(s64) = COPY $x0 + %val2:_(s64) = COPY $x1 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset:_(s64) = G_CONSTANT i64 1 + %ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64) + %ptr_add2:_(p0) = G_PTR_ADD %global, %offset(s64) + G_STORE %val1:_(s64), %ptr_add1 :: (store 8) + G_STORE %val2:_(s64), %ptr_add2 :: (store 8) + RET_ReallyLR implicit $x0 + +... +--- +name: two_ptr_adds_different_offset +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + ; The lowest offset G_PTR_ADD (2) should be folded into the G_GLOBAL_VALUE. + ; + ; The other G_PTR_ADD should have its offset decremented by 2. + + ; CHECK-LABEL: name: two_ptr_adds_different_offset + ; CHECK: liveins: $x0, $x1 + ; CHECK: %val1:_(s64) = COPY $x0 + ; CHECK: %val2:_(s64) = COPY $x1 + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 2 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: %ptr_add2:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) + ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8) + ; CHECK: G_STORE %val2(s64), %ptr_add2(p0) :: (store 8) + ; CHECK: RET_ReallyLR implicit $x0 + %val1:_(s64) = COPY $x0 + %val2:_(s64) = COPY $x1 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset1:_(s64) = G_CONSTANT i64 2 + %offset2:_(s64) = G_CONSTANT i64 10 + %ptr_add1:_(p0) = G_PTR_ADD %global, %offset1(s64) + %ptr_add2:_(p0) = G_PTR_ADD %global, %offset2(s64) + G_STORE %val1:_(s64), %ptr_add1 :: (store 8) + G_STORE %val2:_(s64), %ptr_add2 :: (store 8) + RET_ReallyLR implicit $x0 + +... +--- +name: ptr_add_chain +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; We should be able to fold all of the G_PTR_ADDs, except for the last one + ; into the G_GLOBAL_VALUE. + ; + ; (TypeAllocSize = 4, so the offset on the G_GLOBAL_VALUE can't go above + ; that.) + + ; CHECK-LABEL: name: ptr_add_chain + ; CHECK: liveins: $x0 + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: %dont_fold_me:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) + ; CHECK: $x0 = COPY %dont_fold_me(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset:_(s64) = G_CONSTANT i64 1 + %ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64) + %ptr_add2:_(p0) = G_PTR_ADD %ptr_add1, %offset(s64) + %ptr_add3:_(p0) = G_PTR_ADD %ptr_add2, %offset(s64) + %ptr_add4:_(p0) = G_PTR_ADD %ptr_add3, %offset(s64) + %dont_fold_me:_(p0) = G_PTR_ADD %ptr_add4, %offset(s64) + $x0 = COPY %dont_fold_me + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_negative_offset +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + ; Do not add negative offsets to G_GLOBAL_VALUE. + + ; CHECK-LABEL: name: dont_fold_negative_offset + ; CHECK: liveins: $x0 + ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g + ; CHECK: %offset:_(s64) = G_CONSTANT i64 -1 + ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + ; CHECK: $x0 = COPY %ptr_add(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset:_(s64) = G_CONSTANT i64 -1 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add + RET_ReallyLR implicit $x0 + +... +--- +name: dont_min_offset_less_than_curr_offset +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + ; Do not create smaller offsets. Ensures combine termination. + + ; CHECK-LABEL: name: dont_min_offset_less_than_curr_offset + ; CHECK: liveins: $x0 + ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g + 3 + ; CHECK: %offset:_(s64) = G_CONSTANT i64 -1 + ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + ; CHECK: $x0 = COPY %ptr_add(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + 3 + %offset:_(s64) = G_CONSTANT i64 -1 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_max_offset +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + ; 1 << 21 is the largest offset expressible in all object formats. + ; Don't fold it. + + ; CHECK-LABEL: name: dont_fold_max_offset + ; CHECK: liveins: $x0 + ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g + ; CHECK: %offset:_(s64) = G_CONSTANT i64 4292870144 + ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + ; CHECK: $x0 = COPY %ptr_add(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset:_(s64) = G_CONSTANT i64 4292870144 ; 1 << 21 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_offset_larger_than_type_alloc +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + + ; Type alloc size = 4, offset = 16. Don't fold. + + ; CHECK-LABEL: name: dont_fold_offset_larger_than_type_alloc + ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g + ; CHECK: %offset:_(s64) = G_CONSTANT i64 16 + ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + ; CHECK: $x0 = COPY %ptr_add(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @g + %offset:_(s64) = G_CONSTANT i64 16 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_unsized_type +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; Check that we don't touch unsized globals. + + ; CHECK-LABEL: name: dont_fold_unsized_type + ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @unsized + ; CHECK: %offset:_(s64) = G_CONSTANT i64 16 + ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + ; CHECK: $x0 = COPY %ptr_add(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %global:_(p0) = G_GLOBAL_VALUE @unsized + %offset:_(s64) = G_CONSTANT i64 16 + %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64) + $x0 = COPY %ptr_add(p0) + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir @@ -6,6 +6,7 @@ target triple = "aarch64--" @var = external global i8 define i8* @test_global() { ret i8* undef } + define i8* @test_global_with_offset() { ret i8* undef } ... --- name: test_global @@ -17,15 +18,6 @@ ; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code ; model isn't 'Small'. - ; CHECK-LABEL: name: test_global - ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var - ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0) - ; CHECK: $x0 = COPY [[PTRTOINT]](s64) - ; CMLARGE-LABEL: name: test_global - ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) - ; CMLARGE: $x0 = COPY [[PTRTOINT]](s64) ; PIC-LABEL: name: test_global ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) @@ -34,3 +26,17 @@ %1:_(s64) = G_PTRTOINT %0 $x0 = COPY %1 ... +--- +name: test_global_with_offset +registers: + - { id: 0, class: _ } +body: | + bb.0: + ; PIC-LABEL: name: test_global_with_offset + ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + 1 + ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) + ; PIC: $x0 = COPY [[PTRTOINT]](s64) + %0(p0) = G_GLOBAL_VALUE @var + 1 + %1:_(s64) = G_PTRTOINT %0 + $x0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir @@ -7,6 +7,7 @@ target triple = "aarch64--" @var = external dso_local global i8 define i8* @test_global() { ret i8* undef } + define i8* @test_global_with_offset() { ret i8* undef } ... --- name: test_global @@ -17,16 +18,11 @@ ; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code ; model isn't 'Small'. - ; CHECK-LABEL: name: test_global ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0) ; CHECK: $x0 = COPY [[PTRTOINT]](s64) - ; PIC-LABEL: name: test_global - ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) - ; PIC: $x0 = COPY [[PTRTOINT]](s64) ; CMLARGE-LABEL: name: test_global ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) @@ -35,3 +31,23 @@ %1:_(s64) = G_PTRTOINT %0 $x0 = COPY %1 ... +--- +name: test_global_with_offset +body: | + bb.0: + ; When we legalize into ADRP + G_ADD_LOW, both should inherit the offset + ; from the original G_GLOBAL_VALUE. + ; + ; CHECK-LABEL: name: test_global_with_offset + ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var + 1 + ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var + 1 + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0) + ; CHECK: $x0 = COPY [[PTRTOINT]](s64) + ; CMLARGE-LABEL: name: test_global_with_offset + ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + 1 + ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) + ; CMLARGE: $x0 = COPY [[PTRTOINT]](s64) + %0:_(p0) = G_GLOBAL_VALUE @var + 1 + %1:_(s64) = G_PTRTOINT %0 + $x0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + + @x = external hidden local_unnamed_addr global i32*, align 8 + + define void @select_add_low_without_offset() { ret void } + define void @select_add_low_with_offset() { ret void } + define void @select_add_low_without_adrp() { ret void } + +... +--- +name: select_add_low_without_offset +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: select_add_low_without_offset + ; CHECK: liveins: $x0 + ; CHECK: %add_low:gpr64 = MOVaddr target-flags(aarch64-page) @x, target-flags(aarch64-pageoff, aarch64-nc) @x + ; CHECK: $x0 = COPY %add_low + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(p0) = COPY $x0 + %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + $x0 = COPY %add_low + RET_ReallyLR implicit $x0 + +... +--- +name: select_add_low_with_offset +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: select_add_low_with_offset + ; CHECK: liveins: $x0 + ; CHECK: %add_low:gpr64 = MOVaddr target-flags(aarch64-page) @x + 1, target-flags(aarch64-pageoff, aarch64-nc) @x + 1 + ; CHECK: $x0 = COPY %add_low + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(p0) = COPY $x0 + %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 1 + %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 1 + $x0 = COPY %add_low + RET_ReallyLR implicit $x0 + +... +--- +name: select_add_low_without_adrp +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: select_add_low_without_adrp + ; CHECK: liveins: $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %add_low:gpr64sp = ADDXri %ptr, target-flags(aarch64-pageoff, aarch64-nc) @x, 0 + ; CHECK: $x0 = COPY %add_low + ; CHECK: RET_ReallyLR implicit $x0 + %ptr:gpr(p0) = COPY $x0 + %add_low:gpr(p0) = G_ADD_LOW %ptr(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + $x0 = COPY %add_low + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir @@ -0,0 +1,38 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=LARGE +# RUN: llc -mtriple=aarch64 -code-model=small -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=SMALL +# RUN: llc -mtriple=aarch64 -code-model=tiny -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=TINY + +--- | + @g = external hidden global i32 + define void @select_gv_with_offset() { ret void } +... +--- +name: select_gv_with_offset +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; LARGE-LABEL: name: select_gv_with_offset + ; LARGE: liveins: $x0 + ; LARGE: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @g + 1, 0 + ; LARGE: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @g + 1, 16 + ; LARGE: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @g + 1, 32 + ; LARGE: %g:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @g + 1, 48 + ; LARGE: $x0 = COPY %g + ; LARGE: RET_ReallyLR implicit $x0 + ; SMALL-LABEL: name: select_gv_with_offset + ; SMALL: liveins: $x0 + ; SMALL: %g:gpr64 = MOVaddr target-flags(aarch64-page) @g + 1, target-flags(aarch64-pageoff, aarch64-nc) @g + 1 + ; SMALL: $x0 = COPY %g + ; SMALL: RET_ReallyLR implicit $x0 + ; TINY-LABEL: name: select_gv_with_offset + ; TINY: liveins: $x0 + ; TINY: %g:gpr64 = ADR @g + 1 + ; TINY: $x0 = COPY %g + ; TINY: RET_ReallyLR implicit $x0 + %g:gpr(p0) = G_GLOBAL_VALUE @g + 1 + $x0 = COPY %g(p0) + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -41,7 +41,8 @@ @x = external hidden local_unnamed_addr global i32*, align 8 define void @store_adrp_add_low() { ret void } - + define void @store_adrp_add_low_foldable_offset() { ret void } + define void @store_adrp_add_low_unfoldable_offset() { ret void } ... --- @@ -622,3 +623,43 @@ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) + +... +--- +name: store_adrp_add_low_foldable_offset +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: store_adrp_add_low_foldable_offset + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64all = COPY $x0 + ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x + 8 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy + ; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x + 8 :: (store 8 into @x) + %copy:gpr(p0) = COPY $x0 + %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 8 + %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 8 + G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) + +... +--- +name: store_adrp_add_low_unfoldable_offset +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: store_adrp_add_low_unfoldable_offset + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64all = COPY $x0 + ; CHECK: %add_low:gpr64common = MOVaddr target-flags(aarch64-page) @x + 3, target-flags(aarch64-pageoff, aarch64-nc) @x + 3 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy + ; CHECK: STRXui [[COPY]], %add_low, 0 :: (store 8 into @x) + %copy:gpr(p0) = COPY $x0 + %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3 + %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3 + G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) diff --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll --- a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll +++ b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll @@ -1,69 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc < %s -global-isel -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=GISEL @x1 = external hidden global [2 x i64] @x2 = external hidden global [16777216 x i64] @x3 = external hidden global { [9 x i8*], [8 x i8*] } define i64 @f1() { - ; CHECK: f1: - ; CHECK: adrp x8, x1+16 - ; CHECK: ldr x0, [x8, :lo12:x1+16] +; CHECK-LABEL: f1: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, x1+16 +; CHECK-NEXT: ldr x0, [x8, :lo12:x1+16] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f1: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x8, x1+16 +; GISEL-NEXT: ldr x0, [x8, :lo12:x1+16] +; GISEL-NEXT: ret %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2) ret i64 %l } define i64 @f2() { - ; CHECK: f2: - ; CHECK: adrp x8, x1 - ; CHECK: add x8, x8, :lo12:x1 - ; CHECK: ldr x0, [x8, #24] +; CHECK-LABEL: f2: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, x1 +; CHECK-NEXT: add x8, x8, :lo12:x1 +; CHECK-NEXT: ldr x0, [x8, #24] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f2: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x8, x1 +; GISEL-NEXT: add x8, x8, :lo12:x1 +; GISEL-NEXT: ldr x0, [x8, #24] +; GISEL-NEXT: ret + %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3) ret i64 %l } define i64 @f3() { - ; CHECK: f3: - ; CHECK: adrp x8, x1+1 - ; CHECK: add x8, x8, :lo12:x1+1 - ; CHECK: ldr x0, [x8] +; CHECK-LABEL: f3: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, x1+1 +; CHECK-NEXT: add x8, x8, :lo12:x1+1 +; CHECK-NEXT: ldr x0, [x8] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f3: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x8, x1+1 +; GISEL-NEXT: add x8, x8, :lo12:x1+1 +; GISEL-NEXT: ldr x0, [x8] +; GISEL-NEXT: ret %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*) ret i64 %l } define [2 x i64] @f4() { - ; CHECK: f4: - ; CHECK: adrp x8, x2+8 - ; CHECK: add x8, x8, :lo12:x2+8 - ; CHECK: ldp x0, x1, [x8] +; FIXME: GlobalISel misses the opportunity to form a LDP here. +; +; CHECK-LABEL: f4: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, x2+8 +; CHECK-NEXT: add x8, x8, :lo12:x2+8 +; CHECK-NEXT: ldp x0, x1, [x8] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f4: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x9, x2+8 +; GISEL-NEXT: adrp x8, x2+8 +; GISEL-NEXT: add x9, x9, :lo12:x2+8 +; GISEL-NEXT: ldr x0, [x8, :lo12:x2+8] +; GISEL-NEXT: ldr x1, [x9, #8] +; GISEL-NEXT: ret %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*) ret [2 x i64] %l } define i64 @f5() { - ; CHECK: f5: - ; CHECK: adrp x8, x2+2097144 - ; CHECK: ldr x0, [x8, :lo12:x2+2097144] - ; CHECK: ret +; CHECK-LABEL: f5: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, x2+2097144 +; CHECK-NEXT: ldr x0, [x8, :lo12:x2+2097144] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f5: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x8, x2+2097144 +; GISEL-NEXT: ldr x0, [x8, :lo12:x2+2097144] +; GISEL-NEXT: ret %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143) ret i64 %l } define i64 @f6() { - ; CHECK: f6: - ; CHECK: adrp x8, x2 - ; CHECK: add x8, x8, :lo12:x2 - ; CHECK: mov w9, #2097152 - ; CHECK: ldr x0, [x8, x9] - ; CHECK: ret +; CHECK-LABEL: f6: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, x2 +; CHECK-NEXT: add x8, x8, :lo12:x2 +; CHECK-NEXT: mov w9, #2097152 +; CHECK-NEXT: ldr x0, [x8, x9] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f6: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x9, x2 +; GISEL-NEXT: mov w8, #2097152 +; GISEL-NEXT: add x9, x9, :lo12:x2 +; GISEL-NEXT: ldr x0, [x9, x8] +; GISEL-NEXT: ret %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144) ret i64 %l } define i32 @f7() { +; FIXME: GlobalISel doesn't handle vectors well. +; +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, x3+108 +; CHECK-NEXT: ldr w0, [x8, :lo12:x3+108] +; CHECK-NEXT: ret +; +; GISEL-LABEL: f7: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adrp x8, x3+88 +; GISEL-NEXT: add x8, x8, :lo12:x3+88 +; GISEL-NEXT: mov v0.d[1], x8 +; GISEL-NEXT: mov w9, #64 +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: sub x8, x9, #64 // =64 +; GISEL-NEXT: fmov x11, d1 +; GISEL-NEXT: fmov x10, d0 +; GISEL-NEXT: lsl x12, x11, x8 +; GISEL-NEXT: cmp x9, #64 // =64 +; GISEL-NEXT: lsr x8, x11, x8 +; GISEL-NEXT: orr x11, x12, x10, lsr #0 +; GISEL-NEXT: csel x8, x11, x8, lo +; GISEL-NEXT: cmp x9, #0 // =0 +; GISEL-NEXT: csel x8, x10, x8, eq +; GISEL-NEXT: ldr w0, [x8, #20] +; GISEL-NEXT: ret + entry: - ; CHECK: f7 - ; CHECK: adrp x8, x3+108 - ; CHECK: ldr w0, [x8, :lo12:x3+108] %l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> to i128), i128 64) to i64) to i32*), i64 5) ret i32 %l }