diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -721,8 +721,7 @@ setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::STORE); - if (Subtarget->supportsAddressTopByteIgnored()) - setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MUL); @@ -12945,6 +12944,75 @@ return false; } +static int anchorOffset(int Offset, unsigned Size) { + if (isInt<9>(Offset)) + return 0; + int Scale = countTrailingZeros(Size); + if (isUIntN(Scale + 12, Offset) && (Offset == ((Offset >> Scale) << Scale))) + return 0; + // Ignore those constants can be defined in one instruction. + if (Offset <= 65535) + return 0; + const int Upper20 = Offset & 0xfffff000; + if (isUInt<12>(Upper20)) + return Upper20; + if (isUInt<12>(Upper20 >> 12)) + return Upper20; + return 0; +} + +static bool legalizedAddress(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!DCI.isAfterLegalizeDAG()) + return false; + const bool IsStore = N->getOpcode() == ISD::STORE; + const unsigned AddrOpIdx = (IsStore ? 2 : 1); + SDValue Addr = N->getOperand(AddrOpIdx); + if (Addr.getOpcode() != ISD::ADD) + return false; + + auto *C = dyn_cast(Addr.getOperand(1)); + if (!C) + return false; + + unsigned Size = dyn_cast(N)->getMemOperand()->getSize(); + int BaseOffset = anchorOffset(C->getSExtValue(), Size); + if (BaseOffset == 0) + return false; + SDLoc DL(N); + + SDValue BaseOffVal = DAG.getConstant(BaseOffset, DL, C->getValueType(0)); + BaseOffVal = DAG.getSExtOrTrunc(BaseOffVal, DL, Addr.getValueType()); + SDValue NewAdd = DAG.getNode(ISD::ADD, DL, Addr.getValueType(), + Addr.getOperand(0), BaseOffVal); + SDValue IndexVal = + DAG.getConstant(C->getSExtValue() - BaseOffset, DL, C->getValueType(0)); + IndexVal = DAG.getSExtOrTrunc(IndexVal, DL, Addr.getValueType()); + NewAdd = DAG.getNode(ISD::ADD, DL, Addr.getValueType(), NewAdd, IndexVal); + if (IsStore) { + DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), NewAdd, + N->getOperand(3)); + } else { + DAG.UpdateNodeOperands(N, N->getOperand(0), NewAdd, N->getOperand(2)); + } + return true; +} + +static SDValue performLOADCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (legalizedAddress(N, DCI, DAG, Subtarget)) + return SDValue(); + + if (Subtarget->supportsAddressTopByteIgnored() && + performTBISimplification(N->getOperand(1), DCI, DAG)) + return SDValue(N, 0); + + return SDValue(); +} + static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, @@ -12952,6 +13020,9 @@ if (SDValue Split = splitStores(N, DCI, DAG, Subtarget)) return Split; + if (legalizedAddress(N, DCI, DAG, Subtarget)) + return SDValue(); + if (Subtarget->supportsAddressTopByteIgnored() && performTBISimplification(N->getOperand(2), DCI, DAG)) return SDValue(N, 0); @@ -12959,7 +13030,6 @@ return SDValue(); } - /// Target-specific DAG combine function for NEON load/store intrinsics /// to merge base address updates. static SDValue performNEONPostLDSTCombine(SDNode *N, @@ -14146,9 +14216,7 @@ case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); case ISD::LOAD: - if (performTBISimplification(N->getOperand(1), DCI, DAG)) - return SDValue(N, 0); - break; + return performLOADCombine(N, DCI, DAG, Subtarget); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: diff --git a/llvm/test/CodeGen/AArch64/ldst-large-offset.ll b/llvm/test/CodeGen/AArch64/ldst-large-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldst-large-offset.ll @@ -0,0 +1,53 @@ +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-android" + +; Function Attrs: norecurse nounwind readonly +define hidden i32 @_Z10LoadObjectPi(i32* nocapture readonly %a) local_unnamed_addr #0 { +entry: + ; CHECK-LABEL: _Z10LoadObjectPi: + ; CHECK: add x{{[0-9]+}}, x0, #22, lsl #12 + ; CHECK-NEXT: ldr w0, [x{{[0-9]+}}, #3924] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 23509 + %0 = load i32, i32* %arrayidx, align 4, !tbaa !3 + ret i32 %0 +} + +; Function Attrs: nofree norecurse nounwind writeonly +define hidden void @_Z11StoreObjectPii(i32* nocapture %a, i32 %v) local_unnamed_addr #1 { +entry: + ; CHECK-LABEL: _Z11StoreObjectPii: + ; CHECK: add x{{[0-9]+}}, x0, #22, lsl #12 + ; CHECK-NEXT: str w1, [x{{[0-9]+}}, #3924] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 23509 + store i32 %v, i32* %arrayidx, align 4, !tbaa !3 + ret void +} + +; Function Attrs: nofree norecurse nounwind +define hidden i32 @_Z13LoadThenStorePi(i32* nocapture %a) local_unnamed_addr #2 { +entry: + ; CHECK-LABEL: _Z13LoadThenStorePi: + ; CHECK: add x{{[0-9]+}}, x0, #22, lsl #12 + ; CHECK-NEXT: ldr w0, [x{{[0-9]+}}, #3924] + ; CHECK-NEXT: str wzr, [x{{[0-9]+}}, #3924] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 23509 + %0 = load i32, i32* %arrayidx, align 4, !tbaa !3 + store i32 0, i32* %arrayidx, align 4, !tbaa !3 + ret i32 %0 +} + +attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 9.0.0 (tags/RELEASE_900/final 375507)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"}