diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1919,9 +1919,28 @@ return selectBrJT(I, MRI); case AArch64::G_ADD_LOW: { - I.setDesc(TII.get(AArch64::ADDXri)); - I.addOperand(MachineOperand::CreateImm(0)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + // This op may have been separated from it's ADRP companion by the localizer + // or some other code motion pass. Given that many CPUs will try to + // macro fuse these operations anyway, select this into a MOVaddr pseudo + // which will later be expanded into an ADRP+ADD pair after scheduling. + MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); + if (BaseMI->getOpcode() != AArch64::ADRP) { + I.setDesc(TII.get(AArch64::ADDXri)); + I.addOperand(MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + assert(TM.getCodeModel() == CodeModel::Small && + "Expected small code model"); + MachineIRBuilder MIB(I); + auto Op1 = BaseMI->getOperand(1); + auto Op2 = I.getOperand(2); + auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) + .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), + Op1.getTargetFlags()) + .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), + Op2.getTargetFlags()); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); } case TargetOpcode::G_BSWAP: { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -50,10 +50,11 @@ ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 -; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: adrp x8, _asdf@PAGE ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: add x0, x8, _asdf@PAGEOFF +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x0, _asdf@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 @@ -65,6 +66,7 @@ ; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: mov x27, x8 ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload @@ -87,6 +89,7 @@ ; CHECK-NEXT: ldp x28, x27, [sp, #128] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #224 ; =224 ; CHECK-NEXT: b _musttail_variadic_callee +; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0)) %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...) ret i32 %r @@ -189,16 +192,16 @@ ; CHECK-NEXT: ldr x9, [x0, #8] ; CHECK-NEXT: br x9 ; CHECK-NEXT: LBB5_2: ; %else -; CHECK-NEXT: Lloh0: +; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x10, _g@GOTPAGE ; CHECK-NEXT: ldr x9, [x0, #16] -; CHECK-NEXT: Lloh1: +; CHECK-NEXT: Lloh3: ; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF] ; CHECK-NEXT: mov w11, #42 -; CHECK-NEXT: Lloh2: +; CHECK-NEXT: Lloh4: ; CHECK-NEXT: str w11, [x10] ; CHECK-NEXT: br x9 -; CHECK-NEXT: .loh AdrpLdrGotStr Lloh0, Lloh1, Lloh2 +; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4 %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 %cond = load i1, i1* %cond_p br i1 %cond, label %then, label %else