Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -4212,7 +4212,7 @@ // terminator. BasicBlock *Parent = BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); - if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad()) + if (!Parent->getTerminator()->isEHPad()) LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); } } @@ -4742,8 +4742,7 @@ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); GetElementPtrInst *GEP = LargeOffsetGEP.first; - if (GEP && GEP->getParent() != MemoryInst->getParent() && - !NewGEPBases.count(GEP)) { + if (GEP && !NewGEPBases.count(GEP)) { // If splitting the underlying data structure can reduce the offset of a // GEP, collect the GEP. Skip the GEPs that are the new bases of // previously split data structures. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -406,6 +406,10 @@ SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); + bool reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, SDValue N0, + SDValue N1, + SDNodeFlags Flags); SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); @@ -946,6 +950,46 @@ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } +bool DAGCombiner::reassociationCanBreakAddressingModePattern( + unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { + if (Opc != ISD::ADD) + return false; + + if (Flags.hasVectorReduction()) + return false; + + if (N0.getOpcode() != Opc || N0->getFlags().hasVectorReduction()) + return false; + + ConstantSDNode *C1 = dyn_cast(N0.getOperand(1)); + ConstantSDNode *C2 = dyn_cast(N1); + if (!C1 || !C2) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt &C2APIntVal = C2->getAPIntValue(); + + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + + EVT VT = N0.getValueType(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + + // If it already is not a legal addressing mode then we break nothing + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, 0)) + return false; + + const APInt CombinedValue = C1APIntVal + C2APIntVal; + if (CombinedValue.getBitWidth() > 64) + return false; + AM.BaseOffs = CombinedValue.getSExtValue(); + return !TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, 0); +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { // Don't reassociate reductions. @@ -2143,8 +2187,11 @@ return NewSel; // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) - return RADD; + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1, + N->getFlags())) { + if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) + return RADD; + } // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -141,6 +141,7 @@ Type *Ty) const override { return true; } + bool shouldConsiderGEPOffsetSplit() const override { return true; } SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; Index: test/CodeGen/RISCV/split-offsets-1.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/split-offsets-1.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a2, 20 +; RV32I-NEXT: addi a2, a2, -1920 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: addi a3, zero, 1 +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: add a0, a1, a2 +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: ret +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + store i32 2, i32* %gep0 + store i32 1, i32* %gep1 + store i32 1, i32* %gep2 + store i32 2, i32* %gep3 + ret void +} Index: test/CodeGen/RISCV/split-offsets-2.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/split-offsets-2.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a3, 20 +; RV32I-NEXT: addi a3, a3, -1920 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a3 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: bge a3, a2, .LBB0_2 +; RV32I-NEXT: .LBB0_1: # %while_body +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: addi a4, a3, 1 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a4, 0(a1) +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: blt a3, a2, .LBB0_1 +; RV32I-NEXT: .LBB0_2: # %while_end +; RV32I-NEXT: ret +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + br label %while_cond +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end +while_body: + %i = add i32 %phi, 1 + %j = add i32 %phi, 2 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + store i32 %i, i32* %gep2 + store i32 %phi, i32* %gep3 + br label %while_cond +while_end: + ret void +}