diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14296,13 +14296,23 @@ bool AArch64TargetLowering::shouldLocalize( const MachineInstr &MI, const TargetTransformInfo *TTI) const { - if (MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { + switch (MI.getOpcode()) { + case TargetOpcode::G_GLOBAL_VALUE: { // On Darwin, TLS global vars get selected into function calls, which // we don't want localized, as they can get moved into the middle of a // another call sequence. const GlobalValue &GV = *MI.getOperand(1).getGlobal(); if (GV.isThreadLocal() && Subtarget->isTargetMachO()) return false; + break; + } + // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being + // localizable. + case AArch64::ADRP: + case AArch64::G_ADD_LOW: + return true; + default: + break; } return TargetLoweringBase::shouldLocalize(MI, TTI); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -0,0 +1,26 @@ +//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AArch64 GlobalISel target pseudo instruction definitions. This is kept +// separately from the other tablegen files for organizational purposes, but +// share the same infrastructure. +// +//===----------------------------------------------------------------------===// + + +class AArch64GenericInstruction : GenericInstruction { + let Namespace = "AArch64"; +} + +// A pseudo to represent a relocatable add instruction as part of address +// computation. +def G_ADD_LOW : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src, type2:$imm); + let hasSideEffects = 0; +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7590,3 +7590,5 @@ include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" + +include "AArch64InstrGISel.td" diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -221,6 +221,11 @@ return selectAddrModeUnscaled(Root, 16); } + /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used + /// from complex pattern matchers like selectAddrModeIndexed(). + ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, + MachineRegisterInfo &MRI) const; + ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, unsigned Size) const; template @@ -1603,6 +1608,18 @@ return contractCrossBankCopyIntoStore(I, MRI); case TargetOpcode::G_PTR_ADD: return convertPtrAddToAdd(I, MRI); + case TargetOpcode::G_LOAD: { + // For scalar loads of pointers, we try to convert the dest type from p0 + // to s64 so that our imported patterns can match. Like with the G_PTR_ADD + // conversion, this should be ok because all users should have been + // selected already, so the type doesn't matter for them. + Register DstReg = I.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + if (!DstTy.isPointer()) + return false; + MRI.setType(DstReg, LLT::scalar(64)); + return true; + } default: return false; } @@ -1782,7 +1799,7 @@ unsigned Opcode = I.getOpcode(); // G_PHI requires same handling as PHI - if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) { + if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { // Certain non-generic instructions also need some special handling. if (Opcode == TargetOpcode::LOAD_STACK_GUARD) @@ -1903,6 +1920,12 @@ case TargetOpcode::G_BRJT: return selectBrJT(I, MRI); + case AArch64::G_ADD_LOW: { + I.setDesc(TII.get(AArch64::ADDXri)); + I.addOperand(MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + case TargetOpcode::G_BSWAP: { // Handle vector types for G_BSWAP directly. Register DstReg = I.getOperand(0).getReg(); @@ -5153,14 +5176,51 @@ return None; } +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, + unsigned Size, + MachineRegisterInfo &MRI) const { + if (RootDef.getOpcode() != AArch64::G_ADD_LOW) + return None; + MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); + if (Adrp.getOpcode() != AArch64::ADRP) + return None; + + // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. + // TODO: Need to check GV's offset % size if doing offset folding into globals. + assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); + auto GV = Adrp.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) + return None; + + unsigned Alignment = GV->getAlignment(); + Type *Ty = GV->getValueType(); + auto &MF = *RootDef.getParent()->getParent(); + if (Alignment == 0 && Ty->isSized()) + Alignment = MF.getDataLayout().getABITypeAlignment(Ty); + + if (Alignment < Size) + return None; + + unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); + MachineIRBuilder MIRBuilder(RootDef); + Register AdrpReg = Adrp.getOperand(0).getReg(); + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, + [=](MachineInstrBuilder &MIB) { + MIB.addGlobalAddress(GV, /* Offset */ 0, + OpFlags | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + }}}; +} + /// Select a "register plus scaled unsigned 12-bit immediate" address. The /// "Size" argument is the size in bytes of the memory reference, which /// determines the scale. InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, unsigned Size) const { - MachineRegisterInfo &MRI = - Root.getParent()->getParent()->getParent()->getRegInfo(); + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); if (!Root.isReg()) return None; @@ -5176,6 +5236,14 @@ }}; } + CodeModel::Model CM = MF.getTarget().getCodeModel(); + // Check if we can fold in the ADD of small code model ADRP + ADD address. + if (CM == CodeModel::Small) { + auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); + if (OpFns) + return OpFns; + } + if (isBaseWithConstantOffset(Root, MRI)) { MachineOperand &LHS = RootDef->getOperand(1); MachineOperand &RHS = RootDef->getOperand(2); diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h @@ -43,6 +43,11 @@ bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; + + bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const; + const AArch64Subtarget *ST; }; } // End llvm namespace. #endif diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -30,7 +30,8 @@ using namespace LegalizeMutations; using namespace LegalityPredicates; -AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { +AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) + : ST(&ST) { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 64); const LLT s1 = LLT::scalar(1); @@ -52,6 +53,8 @@ const LLT v2s64 = LLT::vector(2, 64); const LLT v2p0 = LLT::vector(2, p0); + const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); + // FIXME: support subtargets which have neon/fp-armv8 disabled. if (!ST.hasNEON() || !ST.hasFPARMv8()) { computeTables(); @@ -413,7 +416,11 @@ // Pointer-handling getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); - getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); + + if (TM.getCodeModel() == CodeModel::Small) + getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom(); + else + getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); getActionDefinitionsBuilder(G_PTRTOINT) .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) @@ -634,11 +641,46 @@ case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); + case TargetOpcode::G_GLOBAL_VALUE: + return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); } llvm_unreachable("expected switch to return"); } +bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { + assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); + // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + + // G_ADD_LOW instructions. + // By splitting this here, we can optimize accesses in the small code model by + // folding in the G_ADD_LOW into the load/store offset. + auto GV = MI.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) + return true; // Don't want to modify TLS vars. + + MIRBuilder.setInstrAndDebugLoc(MI); + auto &TM = ST->getTargetLowering()->getTargetMachine(); + unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); + + if (OpFlags & AArch64II::MO_GOT) + return true; + + Register DstReg = MI.getOperand(0).getReg(); + auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) + .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); + // Set the regclass on the dest reg too. + MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); + + MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) + .addGlobalAddress(GV, 0, + OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + MI.eraseFromParent(); + return true; +} + bool AArch64LegalizerInfo::legalizeIntrinsic( MachineInstr &MI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -50,11 +50,10 @@ ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 +; CHECK-NEXT: mov x27, x8 +; CHECK-NEXT: adrp x8, _asdf@PAGE ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x0, _asdf@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF +; CHECK-NEXT: add x0, x8, _asdf@PAGEOFF ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 @@ -66,7 +65,6 @@ ; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill -; CHECK-NEXT: mov x27, x8 ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload @@ -89,7 +87,6 @@ ; CHECK-NEXT: ldp x28, x27, [sp, #128] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #224 ; =224 ; CHECK-NEXT: b _musttail_variadic_callee -; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0)) %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...) ret i32 %r @@ -192,16 +189,16 @@ ; CHECK-NEXT: ldr x9, [x0, #8] ; CHECK-NEXT: br x9 ; CHECK-NEXT: LBB5_2: ; %else -; CHECK-NEXT: Lloh2: +; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x10, _g@GOTPAGE ; CHECK-NEXT: ldr x9, [x0, #16] -; CHECK-NEXT: Lloh3: +; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF] ; CHECK-NEXT: mov w11, #42 -; CHECK-NEXT: Lloh4: +; CHECK-NEXT: Lloh2: ; CHECK-NEXT: str w11, [x10] ; CHECK-NEXT: br x9 -; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4 +; CHECK-NEXT: .loh AdrpLdrGotStr Lloh0, Lloh1, Lloh2 %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 %cond = load i1, i1* %cond_p br i1 %cond, label %then, label %else diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir @@ -54,7 +54,7 @@ body: | bb.1.entry: %0:_(s8) = G_IMPLICIT_DEF - %4:_(p0) = G_GLOBAL_VALUE @.str, debug-location !DILocation(line: 0, scope: !4) + %4:_(p0) = COPY $x0 %10:_(p0) = G_IMPLICIT_DEF debug-location !DILocation(line: 0, scope: !4) %1:_(s1) = G_TRUNC %0(s8) %2:_(s32) = G_ZEXT %1(s1) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir @@ -27,8 +27,9 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @addr - ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[GV]](p0) :: (store 8 into @addr) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @addr + ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @addr + ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[ADD_LOW]](p0) :: (store 8 into @addr) ; CHECK: G_BRINDIRECT [[BLOCK_ADDR]](p0) ; CHECK: bb.1.block (address-taken): ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -12,8 +12,6 @@ entry: ret void } - @var = global i8 0 - define i8* @test_global() { ret i8* undef } ... --- @@ -84,19 +82,3 @@ %3:_(s32) = G_ANYEXT %2 $w0 = COPY %3 ... - ---- -name: test_global -registers: - - { id: 0, class: _ } -body: | - bb.0: - - ; CHECK-LABEL: name: test_global - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) - ; CHECK: $x0 = COPY [[PTRTOINT]](s64) - %0(p0) = G_GLOBAL_VALUE @var - %1:_(s64) = G_PTRTOINT %0 - $x0 = COPY %1 -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir @@ -0,0 +1,38 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer --relocation-model=pic %s -o - | FileCheck %s --check-prefix=PIC +# RUN: llc -O0 -run-pass=legalizer --code-model=large %s -o - | FileCheck %s --check-prefix=CMLARGE + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + @var = external global i8 + define i8* @test_global() { ret i8* undef } +... +--- +name: test_global +registers: + - { id: 0, class: _ } +body: | + bb.0: + + ; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code + ; model isn't 'Small'. + + ; CHECK-LABEL: name: test_global + ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var + ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0) + ; CHECK: $x0 = COPY [[PTRTOINT]](s64) + ; PIC-LABEL: name: test_global + ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) + ; PIC: $x0 = COPY [[PTRTOINT]](s64) + ; CMLARGE-LABEL: name: test_global + ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) + ; CMLARGE: $x0 = COPY [[PTRTOINT]](s64) + %0(p0) = G_GLOBAL_VALUE @var + %1:_(s64) = G_PTRTOINT %0 + $x0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -73,8 +73,8 @@ # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # # DEBUG-NEXT: G_GLOBAL_VALUE (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 1, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # # DEBUG-NEXT: G_EXTRACT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -37,6 +37,22 @@ if.end: ret i32 0 } + define i32 @adrp_add() { + entry: + %0 = load i32, i32* @var1, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + + if.then: + store i32 2, i32* @var2, align 4 + store i32 3, i32* @var1, align 4 + store i32 2, i32* @var3, align 4 + store i32 3, i32* @var1, align 4 + br label %if.end + + if.end: + ret i32 0 + } define void @test_inttoptr() { ret void } define void @many_local_use_intra_block() { ret void } @@ -392,6 +408,78 @@ ... --- +name: adrp_add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: adrp_add + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var1 + ; CHECK: %addlow1:gpr(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var1 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: [[ADRP1:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var2 + ; CHECK: %addlow2:gpr(p0) = G_ADD_LOW [[ADRP1]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var2 + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[ADRP2:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var3 + ; CHECK: %addlow3:gpr(p0) = G_ADD_LOW [[ADRP2]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var3 + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[ADRP]](p0) :: (load 4 from @var1) + ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[ADRP3:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var2 + ; CHECK: [[ADD_LOW:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP3]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var2 + ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW]](p0) :: (store 4 into @var2) + ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[ADRP4:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var1 + ; CHECK: [[ADD_LOW1:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP4]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var1 + ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store 4 into @var1) + ; CHECK: [[ADRP5:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var3 + ; CHECK: [[ADD_LOW2:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP5]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var3 + ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW2]](p0) :: (store 4 into @var3) + ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store 4 into @var1) + ; CHECK: bb.2.if.end: + ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C6]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + + ; Some of these instructions are dead. + bb.1.entry: + %1:gpr64(p0) = ADRP target-flags(aarch64-page) @var1 + %addlow1:gpr(p0) = G_ADD_LOW %1(p0), target-flags(aarch64-pageoff, aarch64-nc) @var1 + %2:gpr(s32) = G_CONSTANT i32 1 + %4:gpr(s32) = G_CONSTANT i32 2 + %5:gpr64(p0) = ADRP target-flags(aarch64-page) @var2 + %addlow2:gpr(p0) = G_ADD_LOW %5(p0), target-flags(aarch64-pageoff, aarch64-nc) @var2 + %6:gpr(s32) = G_CONSTANT i32 3 + %7:gpr64(p0) = ADRP target-flags(aarch64-page) @var3 + %addlow3:gpr(p0) = G_ADD_LOW %7(p0), target-flags(aarch64-pageoff, aarch64-nc) @var3 + %8:gpr(s32) = G_CONSTANT i32 0 + %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1) + %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %9(s32) + G_BRCOND %3(s1), %bb.2 + G_BR %bb.3 + + bb.2.if.then: + G_STORE %4(s32), %addlow2(p0) :: (store 4 into @var2) + G_STORE %6(s32), %addlow1(p0) :: (store 4 into @var1) + G_STORE %4(s32), %addlow3(p0) :: (store 4 into @var3) + G_STORE %6(s32), %addlow1(p0) :: (store 4 into @var1) + + bb.3.if.end: + $w0 = COPY %8(s32) + RET_ReallyLR implicit $w0 + +... +--- name: test_inttoptr alignment: 4 legalized: true diff --git a/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll b/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll --- a/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll +++ b/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll @@ -52,7 +52,7 @@ ; RUN: -mattr=+call-saved-x18 \ ; RUN: -global-isel \ ; RUN: -o - %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-SAVED-ALL +; RUN: --check-prefix=CHECK-SAVED-ALL-GISEL ; Used to exhaust the supply of GPRs. @var = global [30 x i64] zeroinitializer @@ -124,6 +124,17 @@ ; CHECK-SAVED-ALL-DAG: ldr x15 ; CHECK-SAVED-ALL-DAG: ldr x18 +; CHECK-SAVED-ALL-GISEL: adrp x16, var +; CHECK-SAVED-ALL-GISEL-DAG: ldr x8 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x9 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x10 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x11 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x12 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x13 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x14 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x15 +; CHECK-SAVED-ALL-GISEL-DAG: ldr x18 + call void @callee() ; CHECK: bl callee diff --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll --- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -41,12 +41,10 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] -; FIXME: GlobalISel doesn't fold ands/adds into load/store addressing modes -; right now/ So, we won't get the :lo12:var. ; GISEL-LABEL: test_load_i8: ; GISEL: ldxrb w[[LOADVAL:[0-9]+]], [x0] ; GISEL-NOT: uxtb -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr) %shortval = trunc i64 %val to i8 %extval = zext i8 %shortval to i64 @@ -65,7 +63,7 @@ ; GISEL-LABEL: test_load_i16: ; GISEL: ldxrh w[[LOADVAL:[0-9]+]], [x0] ; GISEL-NOT: uxtb -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr) %shortval = trunc i64 %val to i16 %extval = zext i16 %shortval to i64 @@ -84,7 +82,7 @@ ; GISEL-LABEL: test_load_i32: ; GISEL: ldxr w[[LOADVAL:[0-9]+]], [x0] ; GISEL-NOT: uxtb -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) %shortval = trunc i64 %val to i32 %extval = zext i32 %shortval to i64 @@ -101,7 +99,7 @@ ; GISEL-LABEL: test_load_i64: ; GISEL: ldxr x[[LOADVAL:[0-9]+]], [x0] ; GISEL-NOT: uxtb -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr) store i64 %val, i64* @var, align 8 ret void @@ -218,11 +216,9 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] -; FIXME: GlobalISel doesn't fold ands/adds into load/store addressing modes -; right now/ So, we won't get the :lo12:var. ; GISEL-LABEL: test_load_acquire_i8: ; GISEL: ldaxrb w[[LOADVAL:[0-9]+]], [x0] -; GISEL-DAG: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL-DAG: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) %shortval = trunc i64 %val to i8 %extval = zext i8 %shortval to i64 @@ -240,7 +236,7 @@ ; GISEL-LABEL: test_load_acquire_i16: ; GISEL: ldaxrh w[[LOADVAL:[0-9]+]], [x0] -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) %shortval = trunc i64 %val to i16 %extval = zext i16 %shortval to i64 @@ -258,7 +254,7 @@ ; GISEL-LABEL: test_load_acquire_i32: ; GISEL: ldaxr w[[LOADVAL:[0-9]+]], [x0] -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) %shortval = trunc i64 %val to i32 %extval = zext i32 %shortval to i64 @@ -274,7 +270,7 @@ ; GISEL-LABEL: test_load_acquire_i64: ; GISEL: ldaxr x[[LOADVAL:[0-9]+]], [x0] -; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) store i64 %val, i64* @var, align 8 ret void diff --git a/llvm/test/CodeGen/AArch64/dllimport.ll b/llvm/test/CodeGen/AArch64/dllimport.ll --- a/llvm/test/CodeGen/AArch64/dllimport.ll +++ b/llvm/test/CodeGen/AArch64/dllimport.ll @@ -28,8 +28,7 @@ ; DAG-ISEL: ldr w0, [x8, ext] ; FAST-ISEL: add x8, x8, ext ; FAST-ISEL: ldr w0, [x8] -; GLOBAL-ISEL-FALLBACK: add x8, x8, ext -; GLOBAL-ISEL-FALLBACK: ldr w0, [x8] +; GLOBAL-ISEL-FALLBACK: ldr w0, [x8, ext] ; CHECK: ret define i32* @get_var_pointer() {