Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -9995,6 +9995,66 @@ return DAG.getNode(ExtOp, dl, VT, tmp); } +static SDValue ADDCombineConstantGEP(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SDLoc dl(N); + GlobalAddressSDNode *GVNode = dyn_cast(N0); + ConstantSDNode *OffsetNode = dyn_cast(N1); + SelectionDAG &DAG = DCI.DAG; + + if (!GVNode || !OffsetNode) + return SDValue(); + + // Thumb-1 has limited range for LDR (immediate). For T2/ARM, this + // transformation may not help much. + if (!Subtarget->isThumb1Only()) + return SDValue(); + + // Unsupported for Position-Independent addressing. + if (DAG.getTarget().isPositionIndependent() || Subtarget->isROPI()) + return SDValue(); + + if (Subtarget->useMovt()) + return SDValue(); + + if (GVNode->getOffset()) + return SDValue(); + + if (GVNode->getOpcode() == ISD::GlobalTLSAddress || + GVNode->getOpcode() == ISD::TargetGlobalAddress) + return SDValue(); + + // Heuristic: If GV has many uses, may not be beneficial. + if (GVNode->use_size() >= 3) + return SDValue(); + + const GlobalVariable *GV = dyn_cast(GVNode->getGlobal()); + + if (!GV) + return SDValue(); + + ConstantInt *Offset = + const_cast(OffsetNode->getConstantIntValue()); + + // Simply assume the largest offset for Thumb1 LDR/STR imm, + // which is 32 * 4 = 128. + if (Offset->getSExtValue() < 128) + return SDValue(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + + auto CharPtrTy = Type::getInt8PtrTy(*DAG.getContext()); + auto Base = ConstantExpr::getPointerCast( + const_cast(GV), CharPtrTy); + auto NewAddr = ConstantExpr::getGetElementPtr(CharPtrTy->getElementType(), Base, Offset); + auto CPAddr = DAG.getTargetConstantPool(NewAddr, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); +} + static SDValue findMUL_LOHI(SDValue V) { if (V->getOpcode() == ISD::UMUL_LOHI || V->getOpcode() == ISD::SMUL_LOHI) @@ -10440,6 +10500,10 @@ if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI)) return Result; + + // fold add(GlobalAddress, Constant) into a single CP entry. + if (SDValue Result = ADDCombineConstantGEP(N, N0, N1, DCI, Subtarget)) + return Result; return SDValue(); } Index: test/CodeGen/Thumb/gv-large-offset.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/gv-large-offset.ll @@ -0,0 +1,32 @@ +;RUN: llc -mcpu=cortex-m0 < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m--linux-gnueabi" + +%struct.Str = type { [40 x i32], i8 } +%struct.Str2 = type { [30 x i32], i32 } + +@gv = external global %struct.Str, align 4 +@gv2 = external global %struct.Str2, align 4 + +define i8 @foo() { +;CHECK-LABEL: foo + %ret = load i8, i8* getelementptr inbounds (%struct.Str, %struct.Str* @gv, i32 0, i32 1) + ret i8 %ret +;CHECK: ldr r0, .LCPI0_0 +;CHECK-NEXT: ldrb r0, [r0] + +;CHECK: .LCPI0_0 +;CHECK-NEXT: .long gv+160 +} + +define i32 @foo2() { +;CHECK-LABEL: foo2 + %ret = load i32, i32* getelementptr inbounds (%struct.Str2, %struct.Str2* @gv2, i32 0, i32 1) + ret i32 %ret +;CHECK: ldr r0, .LCPI1_0 +;CHECK-NEXT: ldr r0, [r0, #120] + +;CHECK: .LCPI1_0 +;CHECK-NEXT: .long gv2 +} Index: test/CodeGen/Thumb/single-gv-with-multiple-offsets.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/single-gv-with-multiple-offsets.ll @@ -0,0 +1,31 @@ +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none-unknown-musleabi" + +%struct.ttt = type { [36 x i32], [5 x i32] } + +@global.2 = external dso_local local_unnamed_addr global %struct.ttt, align 4 + +; Check that dagcombine is not reassociating constant GEPs in store addresses. +; CHECK: ldr r[[B:[0-9]+]], .LCPI0_1 +; CHECK: str r1, [r[[B:[0-9]+]]] +; CHECK: str r{{[0-9]+}}, [r[[B:[0-9]+]], #4] +; CHECK: str r{{[0-9]+}}, [r[[B:[0-9]+]], #8] +; CHECK: str r{{[0-9]+}}, [r[[B:[0-9]+]], #12] +; CHECK: str r{{[0-9]+}}, [r[[B:[0-9]+]], #16] + +define dso_local void @blam() { +bb: + %tmp = load volatile i32, i32* inttoptr (i32 805874688 to i32*), align 1024 + store i32 %tmp, i32* getelementptr inbounds (%struct.ttt, %struct.ttt* @global.2, i32 0, i32 1, i32 0), align 4 + %tmp1 = load volatile i32, i32* inttoptr (i32 805874692 to i32*), align 4 + store i32 %tmp1, i32* getelementptr inbounds (%struct.ttt, %struct.ttt* @global.2, i32 0, i32 1, i32 1), align 4 + %tmp2 = load volatile i32, i32* inttoptr (i32 805874696 to i32*), align 8 + store i32 %tmp2, i32* getelementptr inbounds (%struct.ttt, %struct.ttt* @global.2, i32 0, i32 1, i32 2), align 4 + %tmp3 = load volatile i32, i32* inttoptr (i32 805874700 to i32*), align 4 + store i32 %tmp3, i32* getelementptr inbounds (%struct.ttt, %struct.ttt* @global.2, i32 0, i32 1, i32 3), align 4 + %tmp4 = load volatile i32, i32* inttoptr (i32 805874704 to i32*), align 16 + store i32 %tmp4, i32* getelementptr inbounds (%struct.ttt, %struct.ttt* @global.2, i32 0, i32 1, i32 4), align 4 + ret void +}