Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -150,6 +150,7 @@ void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); + bool tryAdrLoad(SDNode *N); bool tryIndexedLoad(SDNode *N); void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, @@ -1094,6 +1095,69 @@ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); } +bool AArch64DAGToDAGISel::tryAdrLoad(SDNode *N) { + // Look for a load(adr ) where the addr is 4 byte aligned + assert(N->getOpcode() == ISD::LOAD && + "N must be a LOAD operation to call this function"); + LoadSDNode *LD = cast(N); + EVT VT = LD->getMemoryVT(); + EVT DstVT = N->getValueType(0); + if (!LD->isUnindexed()) + return false; + if (VT != MVT::i32 && VT != MVT::i64) + return false; + + SDValue Base = LD->getBasePtr(); + if (Base.getNode()->getOpcode() != AArch64ISD::ADR) + return false; + + assert(N->getOperand(2).getNode()->getOpcode() == ISD::UNDEF); + + MachineMemOperand *MMO = LD->getMemOperand(); + if (MMO->getAlignment() < 4) + return false; + + SDLoc dl(N); + SDValue Addr = Base.getNode()->getOperand(0); + SDValue Ops[] = { Addr, LD->getChain() }; + + unsigned Opcode = 0; + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool InsertTo64 = false; + if (VT == MVT::i64) + Opcode = AArch64::LDRXl; + else if (ExtType == ISD::NON_EXTLOAD) + Opcode = AArch64::LDRWl; + else if (ExtType == ISD::SEXTLOAD) + Opcode = AArch64::LDRSWl; + else { + Opcode = AArch64::LDRWl; + InsertTo64 = true; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + + SDNode *Res = CurDAG->getMachineNode(Opcode, dl, DstVT, MVT::Other, Ops); + CurDAG->setNodeMemRefs(cast(Res), {MMO}); + + SDValue LoadedVal = SDValue(Res, 0); + if (InsertTo64) { + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); + LoadedVal = + SDValue(CurDAG->getMachineNode( + AArch64::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, + SubReg), + 0); + } + + ReplaceUses(SDValue(N, 0), LoadedVal); + ReplaceUses(SDValue(N, 1), SDValue(Res, 1)); + CurDAG->RemoveDeadNode(N); + return true; +} + bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); if (LD->isUnindexed()) @@ -2793,6 +2857,9 @@ break; case ISD::LOAD: { + // Try to match load (adr global) -> ldr global + if (tryAdrLoad(Node)) + return; // Try to select as an indexed load. Fall through to normal processing // if we can't. if (tryIndexedLoad(Node)) Index: test/CodeGen/AArch64/code-model-tiny-abs.ll =================================================================== --- test/CodeGen/AArch64/code-model-tiny-abs.ll +++ test/CodeGen/AArch64/code-model-tiny-abs.ll @@ -33,16 +33,14 @@ ; CHECK-LABEL: global_i32: %val = load i32, i32* @var32 ret i32 %val -; CHECK: adr x[[ADDR_REG:[0-9]+]], var32 -; CHECK: ldr w0, [x[[ADDR_REG]]] +; CHECK: ldr w0, var32 } define i64 @global_i64() { ; CHECK-LABEL: global_i64: %val = load i64, i64* @var64 ret i64 %val -; CHECK: adr x[[ADDR_REG:[0-9]+]], var64 -; CHECK: ldr x0, [x[[ADDR_REG]]] +; CHECK: ldr x0, var64 } define <2 x i64> @constpool() { Index: test/CodeGen/AArch64/ldradr.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/ldradr.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-eabi -code-model=tiny -verify-machineinstrs | FileCheck %s + +%struct.T = type <{ i32, i64, i8, i32 }> + +@ptr = external local_unnamed_addr global i32*, align 8 +@ch = external local_unnamed_addr global i32, align 4 +@t = external local_unnamed_addr global %struct.T, align 4 +@t2 = external local_unnamed_addr global %struct.T, align 2 + +define i32 @barp() { +; CHECK-LABEL: barp: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, ptr +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +entry: + %0 = load i32*, i32** @ptr, align 8 + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +define i32 @barch() { +; CHECK-LABEL: barch: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w0, ch +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* @ch, align 4 + ret i32 %0 +} + +define i32 @barta() { +; CHECK-LABEL: barta: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w0, t +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.T, %struct.T* @t, i64 0, i32 0), align 4 + ret i32 %0 +} + +define i64 @bartb() { +; CHECK-LABEL: bartb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x0, t+4 +; CHECK-NEXT: ret +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.T, %struct.T* @t, i64 0, i32 1), align 8 + ret i64 %0 +} + +define i32 @bartc() { +; CHECK-LABEL: bartc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adr x8, t+13 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.T, %struct.T* @t, i64 0, i32 3), align 1 + ret i32 %0 +} + +define i32 @bart2a() { +; CHECK-LABEL: bart2a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adr x8, t2 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.T, %struct.T* @t2, i64 0, i32 0), align 2 + ret i32 %0 +} + +define i64 @zextload() { +; CHECK-LABEL: zextload: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w0, ch +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* @ch, align 4 + %1 = zext i32 %0 to i64 + ret i64 %1 +} + +define i64 @sextload() { +; CHECK-LABEL: sextload: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x0, ch +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* @ch, align 4 + %1 = sext i32 %0 to i64 + ret i64 %1 +} Index: test/CodeGen/AArch64/tiny_model.ll =================================================================== --- test/CodeGen/AArch64/tiny_model.ll +++ test/CodeGen/AArch64/tiny_model.ll @@ -89,9 +89,8 @@ ; CHECK-LABEL: foo3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adr x8, src -; CHECK-NEXT: adr x9, ptr ; CHECK-NEXT: ldrb w8, [x8] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: ldr x9, ptr ; CHECK-NEXT: strb w8, [x9] ; CHECK-NEXT: ret ; @@ -209,9 +208,8 @@ ; CHECK-LABEL: bar3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adr x8, lsrc -; CHECK-NEXT: adr x9, lptr ; CHECK-NEXT: ldrb w8, [x8] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: ldr x9, lptr ; CHECK-NEXT: strb w8, [x9] ; CHECK-NEXT: ret ; @@ -227,9 +225,8 @@ ; CHECK-PIC-LABEL: bar3: ; CHECK-PIC: // %bb.0: // %entry ; CHECK-PIC-NEXT: adr x8, lsrc -; CHECK-PIC-NEXT: adr x9, lptr ; CHECK-PIC-NEXT: ldrb w8, [x8] -; CHECK-PIC-NEXT: ldr x9, [x9] +; CHECK-PIC-NEXT: ldr x9, lptr ; CHECK-PIC-NEXT: strb w8, [x9] ; CHECK-PIC-NEXT: ret ; @@ -329,9 +326,8 @@ ; CHECK-LABEL: baz3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adr x8, lbsrc -; CHECK-NEXT: adr x9, lptr ; CHECK-NEXT: ldrb w8, [x8] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: ldr x9, lptr ; CHECK-NEXT: strb w8, [x9] ; CHECK-NEXT: ret ; @@ -347,9 +343,8 @@ ; CHECK-PIC-LABEL: baz3: ; CHECK-PIC: // %bb.0: // %entry ; CHECK-PIC-NEXT: adr x8, lbsrc -; CHECK-PIC-NEXT: adr x9, lptr ; CHECK-PIC-NEXT: ldrb w8, [x8] -; CHECK-PIC-NEXT: ldr x9, [x9] +; CHECK-PIC-NEXT: ldr x9, lptr ; CHECK-PIC-NEXT: strb w8, [x9] ; CHECK-PIC-NEXT: ret ;