diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -721,8 +721,7 @@ setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::STORE); - if (Subtarget->supportsAddressTopByteIgnored()) - setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MUL); @@ -12945,6 +12944,85 @@ return false; } +static bool InRange(int value, int lower, int upper) { + return ((unsigned)(value) - (unsigned)(lower) <= + (unsigned)(upper) - (unsigned)(lower)); +} + +static int AnchorOffset(int offset, int size) { + /* Does it look like we'll need a 16-byte load/store-pair operation? */ + if (size > 16) + return (offset + 0x400) & ~0x7f0; + + /* For offsets that aren't a multiple of the access size, the limit is + -256...255. */ + if (offset & (size - 1)) { + return (offset + 0x100) & ~0x1ff; + } + + /* Small negative offsets are supported. */ + if (InRange(offset, -256, 0)) + return 0; + + /* Use 12-bit offset by access size. */ + return offset & (~0xfff * size); +} + +static bool legalizedAddress(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return false; + const bool isStore = N->getOpcode() == ISD::STORE; + const unsigned AddrOpIdx = (isStore ? 2 : 1); + SDValue Addr = N->getOperand(AddrOpIdx); + // Unable to handle other than one use. + if (!Addr.hasOneUse()) + return false; + if (Addr.getOpcode() != ISD::ADD) + return false; + + auto *C = dyn_cast(Addr.getOperand(1)); + if (!C) + return false; + EVT Ty; + if (!isStore) { + Ty = N->getValueType(0); + } else { + Ty = N->getOperand(1).getValueType(); + } + int NumBytes = Ty.getSizeInBits() / 8; + int BaseOffset = AnchorOffset(C->getSExtValue(), NumBytes); + if (BaseOffset == 0) + return false; + SDLoc dl(N); + + SDValue BaseOffVal = DAG.getConstant(BaseOffset, dl, C->getValueType(0)); + BaseOffVal = DAG.getSExtOrTrunc(BaseOffVal, dl, Addr.getValueType()); + SDValue NewAdd = DAG.getNode(ISD::ADD, dl, Addr.getValueType(), + Addr.getOperand(0), BaseOffVal); + SDValue IndexVal = + DAG.getConstant(C->getSExtValue() - BaseOffset, dl, C->getValueType(0)); + IndexVal = DAG.getSExtOrTrunc(IndexVal, dl, Addr.getValueType()); + NewAdd = DAG.getNode(ISD::ADD, dl, Addr.getValueType(), NewAdd, IndexVal); + DCI.CombineTo(Addr.getNode(), NewAdd, false); + return true; +} + +static SDValue performLOADCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (legalizedAddress(N, DCI, DAG, Subtarget)) + return SDValue(); + + if (Subtarget->supportsAddressTopByteIgnored() && + performTBISimplification(N->getOperand(1), DCI, DAG)) + return SDValue(N, 0); + + return SDValue(); +} + static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, @@ -12952,6 +13030,9 @@ if (SDValue Split = splitStores(N, DCI, DAG, Subtarget)) return Split; + if (legalizedAddress(N, DCI, DAG, Subtarget)) + return SDValue(); + if (Subtarget->supportsAddressTopByteIgnored() && performTBISimplification(N->getOperand(2), DCI, DAG)) return SDValue(N, 0); @@ -12959,7 +13040,6 @@ return SDValue(); } - /// Target-specific DAG combine function for NEON load/store intrinsics /// to merge base address updates. static SDValue performNEONPostLDSTCombine(SDNode *N, @@ -14146,9 +14226,7 @@ case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); case ISD::LOAD: - if (performTBISimplification(N->getOperand(1), DCI, DAG)) - return SDValue(N, 0); - break; + return performLOADCombine(N, DCI, DAG, Subtarget); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: diff --git a/llvm/test/CodeGen/AArch64/ldst-large-offset.ll b/llvm/test/CodeGen/AArch64/ldst-large-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldst-large-offset.ll @@ -0,0 +1,39 @@ +; RUN: llc %s -O2 -o - | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-android" + +; Function Attrs: norecurse nounwind readonly +define hidden i32 @_Z10LoadObjectPi(i32* nocapture readonly %a) local_unnamed_addr #0 { +entry: + ; CHECK-LABEL: _Z10LoadObjectPi: + ; CHECK: add x{{[0-9]+}}, x0, #20, lsl #12 + ; CHECK-NEXT: ldr w0, [x{{[0-9]+}}, #12116] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 23509 + %0 = load i32, i32* %arrayidx, align 4, !tbaa !3 + ret i32 %0 +} + +; Function Attrs: nofree norecurse nounwind writeonly +define hidden void @_Z11StoreObjectPii(i32* nocapture %a, i32 %v) local_unnamed_addr #1 { +entry: + ; CHECK-LABEL: _Z11StoreObjectPii: + ; CHECK: add x{{[0-9]+}}, x0, #20, lsl #12 + ; CHECK-NEXT: str w1, [x{{[0-9]+}}, #12116] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 23509 + store i32 %v, i32* %arrayidx, align 4, !tbaa !3 + ret void +} + +attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 9.0.0 (tags/RELEASE_900/final 375507)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"}