diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -736,7 +736,6 @@ // situation (as used for tail calls) is implementation-defined, so we // cannot rely on the linker replacing the tail call with a return. if (Info.Callee.isGlobal()) { - errs() << "IsGlobal in CallLowering\n"; const GlobalValue *GV = Info.Callee.getGlobal(); const Triple &TT = MF.getTarget().getTargetTriple(); if (GV->hasExternalWeakLinkage() && diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1553,7 +1553,6 @@ : MRI.createVirtualRegister(&AArch64::GPR64RegClass); auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); if (auto *GV = dyn_cast(V)) { - errs() << "GlobalValue "; I.print(errs()); errs() << "\n"; MovI->addOperand(MF, MachineOperand::CreateGA( GV, MovZ->getOperand(1).getOffset(), Flags)); } else { @@ -1788,12 +1787,6 @@ assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); - errs() << "WTF opcode type are you? " << I.getOpcode() << ", " - << TargetOpcode::G_GLOBAL_VALUE << ", "; I.dump(); errs() << "\n"; - - - errs() << " AAAA \n"; - MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1806,12 +1799,9 @@ return false; } - errs() << " BBBB \n"; - unsigned Opcode = I.getOpcode(); // G_PHI requires same handling as PHI if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { - errs() << " PHIIII \n"; // Certain non-generic instructions also need some special handling. if (Opcode == TargetOpcode::LOAD_STACK_GUARD) @@ -1850,8 +1840,6 @@ return true; } - errs() << " cccc \n"; - if (I.getNumOperands() != I.getNumExplicitOperands()) { LLVM_DEBUG( @@ -1882,11 +1870,8 @@ MachineIRBuilder MIB(I); - errs() << " DDDD \n"; - switch (Opcode) { case TargetOpcode::G_BRCOND: { - errs() << " G_BRCOND \n"; if (Ty.getSizeInBits() > 32) { // We shouldn't need this on AArch64, but it would be implemented as an // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the @@ -1930,17 +1915,12 @@ } case TargetOpcode::G_BRINDIRECT: { - errs() << " G_BRINDIRECT \n"; I.setDesc(TII.get(AArch64::BR)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - case TargetOpcode::G_BRJT: { - errs() << " G_BRJT \n"; + case TargetOpcode::G_BRJT: return selectBrJT(I, MRI); - } - - errs() << " ZZZZ \n"; case AArch64::G_ADD_LOW: { // This op may have been separated from it's ADRP companion by the localizer @@ -1965,15 +1945,8 @@ Op2.getTargetFlags()); I.eraseFromParent(); return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); - - errs() << " G_ADD_LOW \n"; - // I.setDesc(TII.get(AArch64::ADDXri)); - // I.addOperand(MachineOperand::CreateImm(0)); - // return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - errs() << " FFFF \n"; - case TargetOpcode::G_BSWAP: { // Handle vector types for G_BSWAP directly. Register DstReg = I.getOperand(0).getReg(); @@ -2011,8 +1984,6 @@ return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - errs() << " GGGG \n"; - case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_CONSTANT: { const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; @@ -2065,8 +2036,6 @@ } } - errs() << " HHHH \n"; - // We allow G_CONSTANT of types < 32b. const unsigned MovOpc = DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; @@ -2234,10 +2203,7 @@ return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - errs() << " JJJJ \n"; - case TargetOpcode::G_GLOBAL_VALUE: { - errs() << "G_GLOBAL_VALUE "; I.print(errs()); errs() << "\n"; auto GV = I.getOperand(1).getGlobal(); if (GV->isThreadLocal()) return selectTLSGlobalValue(I, MRI); @@ -2264,8 +2230,6 @@ return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - errs() << " KKKK \n"; - case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { @@ -2332,8 +2296,6 @@ } } - errs() << " LLLL \n"; - // If we haven't folded anything into our addressing mode yet, try to fold // a frame index into the base+offset. if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) @@ -5240,7 +5202,6 @@ // TODO: Need to check GV's offset % size if doing offset folding into globals. assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); auto GV = Adrp.getOperand(1).getGlobal(); - errs() << "Trying to fold GV\n"; if (GV->isThreadLocal()) return None; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -660,8 +660,6 @@ // G_ADD_LOW instructions. // By splitting this here, we can optimize accesses in the small code model by // folding in the G_ADD_LOW into the load/store offset. - errs() << "Legalizing small global\n"; - auto GV = MI.getOperand(1).getGlobal(); if (GV->isThreadLocal()) return true; // Don't want to modify TLS vars. @@ -678,36 +676,31 @@ // Set the regclass on the dest reg too. MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); - errs() << "ADRP: "; ADRP->dump(); errs() << "\n"; - + // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so + // by creating a MOVK that sets bits 48-63 of the register to (global address + // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to + // prevent an incorrect tag being generated during relocation when the the + // global appears before the code section. Without the offset, a global at + // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced + // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = + // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` + // instead of `0xf`. + // This assumes that we're in the small code model so we can assume a binary + // size of <= 4GB, which makes the untagged PC relative offset positive. The + // binary must also be loaded into address range [0, 2^48). Both of these + // properties need to be ensured at runtime when using tagged addresses. if (OpFlags & AArch64II::MO_TAGGED) { auto Tag = MI.getOperand(1); Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); Tag.setOffset(0x100000000); - auto TaggedADRP = - MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {}) - .addGlobalAddress(GV, 0, AArch64II::MO_PREL | AArch64II::MO_G3); - errs() << "TaggedADRP: "; TaggedADRP->dump(); errs() << "\n"; - - // auto TaggedAddr = MIRBuilder.buildInstr(AArch64::ADDXri, {DstReg}, {TaggedADRP}) - // .addGlobalAddress(GV, 0, - // OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - // errs() << "TaggedAddr: "; TaggedAddr->dump(); errs() << "\n"; - - auto Addr = - MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {TaggedADRP}) - .addGlobalAddress( - GV, 0, OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - errs() << "Addr: "; Addr->dump(); errs() << "\n"; - - MI.eraseFromParent(); - return true; + ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) + .addGlobalAddress(GV, 0x100000000, + AArch64II::MO_PREL | AArch64II::MO_G3); } - auto Addr = MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) + MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - errs() << "Addr: "; Addr->dump(); errs() << "\n"; MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AArch64/tagged-globals.ll b/llvm/test/CodeGen/AArch64/tagged-globals.ll --- a/llvm/test/CodeGen/AArch64/tagged-globals.ll +++ b/llvm/test/CodeGen/AArch64/tagged-globals.ll @@ -1,20 +1,39 @@ -; RUN: llc --relocation-model=static < %s | FileCheck %s -; RUN: llc --relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC -; RUN: llc --aarch64-enable-global-isel-at-O=2 -O2 < %s | FileCheck %s -; RUN: llc --aarch64-enable-global-isel-at-O=-1 < %s | FileCheck %s +; RUN: llc --relocation-model=static < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-STATIC,CHECK-SELECTIONDAGISEL +; RUN: llc --relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-PIC + +; Ensure that GlobalISel lowers correctly. GlobalISel is the default ISel for +; -O0 on aarch64. GlobalISel lowers the instruction sequence in the static +; relocation model different to SelectionDAGISel. GlobalISel does the lowering +; of AddLow *after* legalization, and thus doesn't differentiate between +; address-taken-only vs. address-taken-for-loadstore. Hence, we generate a movk +; instruction for load/store instructions as well with GlobalISel. GlobalISel +; also doesn't have the scaffolding to correctly check the bounds of the global +; offset, and cannot fold the lo12 bits into the load/store. Neither of these +; things are a problem as GlobalISel is only used by default at -O0, so we don't +; mind the code size and performance increase. + +; RUN: llc --aarch64-enable-global-isel-at-O=0 -O0 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-STATIC,CHECK-GLOBALISEL +; RUN: llc --aarch64-enable-global-isel-at-O=0 -O0 --relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-PIC target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-android" -@global = external hidden global i32 +@global = external global i32 declare void @func() define i32* @global_addr() #0 { - ; CHECK: global_addr: - ; CHECK: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global - ; CHECK: movk [[REG]], #:prel_g3:global+4294967296 - ; CHECK: add x0, [[REG]], :lo12:global - ; CHECK: ret + ; Static relocation model has common codegen between SelectionDAGISel and + ; GlobalISel when the address-taken of a global isn't folded into a load or + ; store instruction. + ; CHECK-STATIC: global_addr: + ; CHECK-STATIC: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-STATIC: movk [[REG]], #:prel_g3:global+4294967296 + ; CHECK-STATIC: add x0, [[REG]], :lo12:global + ; CHECK-STATIC: ret ; CHECK-PIC: global_addr: ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global @@ -24,54 +43,64 @@ ret i32* @global } -; define i32 @global_load() #0 { -; ; CHECK: global_load: -; ; CHECK: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global -; ; CHECK: movk [[REG]], #:prel_g3:global+4294967296 -; ; CHECK: add [[REG]], [[REG]], :lo12:global -; ; CHECK: ldr w0, {{\[}}[[REG]]{{\]}} -; ; CHECK: ret -; -; ; CHECK-PIC: global_load: -; ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global -; ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global] -; ; CHECK-PIC: ldr w0, {{\[}}[[REG]]{{\]}} -; ; CHECK-PIC: ret -; -; %load = load i32, i32* @global -; ret i32 %load -; } -; -; define void @global_store() #0 { -; ; CHECK: global_store: -; ; CHECK: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global -; ; CHECK: movk [[REG]], #:prel_g3:global+4294967296 -; ; CHECK: add [[REG]], [[REG]], :lo12:global -; ; CHECK: str wzr, {{\[}}[[REG]]{{\]}} -; ; CHECK: ret -; -; ; CHECK-PIC: global_store: -; ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global -; ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global] -; ; CHECK-PIC: str wzr, {{\[}}[[REG]]{{\]}} -; ; CHECK-PIC: ret -; -; store i32 0, i32* @global -; ret void -; } -; -; define void ()* @func_addr() #0 { -; ; CHECK: func_addr: -; ; CHECK: adrp [[REG:x[0-9]+]], func -; ; CHECK: add x0, [[REG]], :lo12:func -; ; CHECK: ret -; -; ; CHECK-PIC: func_addr: -; ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:func -; ; CHECK-PIC: ldr x0, {{\[}}[[REG]], :got_lo12:func] -; ; CHECK-PIC: ret -; -; ret void ()* @func -; } +define i32 @global_load() #0 { + ; CHECK-SELECTIONDAGISEL: global_load: + ; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-SELECTIONDAGISEL: ldr w0, {{\[}}[[REG]], :lo12:global{{\]}} + ; CHECK-SELECTIONDAGISEL: ret + + ; CHECK-GLOBALISEL: global_load: + ; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296 + ; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global + ; CHECK-GLOBALISEL: ldr w0, {{\[}}[[REG]]{{\]}} + ; CHECK-GLOBALISEL: ret + + ; CHECK-PIC: global_load: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global + ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global] + ; CHECK-PIC: ldr w0, {{\[}}[[REG]]{{\]}} + ; CHECK-PIC: ret + + %load = load i32, i32* @global + ret i32 %load +} + +define void @global_store() #0 { + ; CHECK-SELECTIONDAGISEL: global_store: + ; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-SELECTIONDAGISEL: str wzr, {{\[}}[[REG]], :lo12:global{{\]}} + ; CHECK-SELECTIONDAGISEL: ret + + ; CHECK-GLOBALISEL: global_store: + ; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296 + ; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global + ; CHECK-GLOBALISEL: str wzr, {{\[}}[[REG]]{{\]}} + ; CHECK-GLOBALISEL: ret + + ; CHECK-PIC: global_store: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global + ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global] + ; CHECK-PIC: str wzr, {{\[}}[[REG]]{{\]}} + ; CHECK-PIC: ret + + store i32 0, i32* @global + ret void +} + +define void ()* @func_addr() #0 { + ; CHECK-STATIC: func_addr: + ; CHECK-STATIC: adrp [[REG:x[0-9]+]], func + ; CHECK-STATIC: add x0, [[REG]], :lo12:func + ; CHECK-STATIC: ret + + ; CHECK-PIC: func_addr: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:func + ; CHECK-PIC: ldr x0, {{\[}}[[REG]], :got_lo12:func] + ; CHECK-PIC: ret + + ret void ()* @func +} attributes #0 = { "target-features"="+tagged-globals" }