Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -4212,7 +4212,7 @@ // terminator. BasicBlock *Parent = BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); - if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad()) + if (!Parent->getTerminator()->isEHPad()) LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); } } @@ -4742,8 +4742,7 @@ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); GetElementPtrInst *GEP = LargeOffsetGEP.first; - if (GEP && GEP->getParent() != MemoryInst->getParent() && - !NewGEPBases.count(GEP)) { + if (GEP && !NewGEPBases.count(GEP)) { // If splitting the underlying data structure can reduce the offset of a // GEP, collect the GEP. Skip the GEPs that are the new bases of // previously split data structures. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -449,6 +449,9 @@ SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); + bool reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, SDValue N0, + SDValue N1); SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); @@ -991,6 +994,63 @@ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } +bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, + SDValue N0, + SDValue N1) { + // Currently this only tries to ensure we don't undo the GEP splits done by + // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this, + // we check if the following transformation would be problematic: + // (load/store (add, (add, x, offset1), offset2)) -> + // (load/store (add, x, offset1+offset2)). + + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) + return false; + + if (N0.hasOneUse()) + return false; + + auto *C1 = dyn_cast(N0.getOperand(1)); + auto *C2 = dyn_cast(N1); + if (!C1 || !C2) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt &C2APIntVal = C2->getAPIntValue(); + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getBitWidth() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N0->uses()) { + auto *LD = dyn_cast(Node); + auto *ST = dyn_cast(Node); + if (LD || ST) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LD ? LD->getMemoryVT() : ST->getMemoryVT(); + unsigned AS = LD ? LD->getAddressSpace() : ST->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; + } + } + + return false; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { // Don't reassociate reductions. @@ -2180,8 +2240,10 @@ return NewSel; // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) - return RADD; + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { + if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) + return RADD; + } // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -145,6 +145,7 @@ template SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; + bool shouldConsiderGEPOffsetSplit() const override { return true; } SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; Index: test/CodeGen/ARM/misched-fusion-aes.ll =================================================================== --- test/CodeGen/ARM/misched-fusion-aes.ll +++ test/CodeGen/ARM/misched-fusion-aes.ll @@ -76,24 +76,25 @@ ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]] ; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]] ; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]] } @@ -165,20 +166,27 @@ ; CHECK-LABEL: aesda: ; CHECK: aesd.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]] + ; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]] + +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]] + ; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]] + +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]] + ; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QH]] @@ -207,6 +215,7 @@ ; CHECK-LABEL: aes_load_store: ; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]] + ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] } Index: test/CodeGen/ARM/vector-spilling.ll =================================================================== --- test/CodeGen/ARM/vector-spilling.ll +++ test/CodeGen/ARM/vector-spilling.ll @@ -22,8 +22,8 @@ %6 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 3 %7 = load <8 x i64>, <8 x i64>* %6, align 8 - %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> - %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> + %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> + %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9) ret void Index: test/CodeGen/RISCV/split-offsets-1.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/split-offsets-1.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a2, 20 +; RV32I-NEXT: addi a2, a2, -1920 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: addi a3, zero, 1 +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: add a0, a1, a2 +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: ret +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + store i32 2, i32* %gep0 + store i32 1, i32* %gep1 + store i32 1, i32* %gep2 + store i32 2, i32* %gep3 + ret void +} Index: test/CodeGen/RISCV/split-offsets-2.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/split-offsets-2.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a3, 20 +; RV32I-NEXT: addi a3, a3, -1920 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a3 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: bge a3, a2, .LBB0_2 +; RV32I-NEXT: .LBB0_1: # %while_body +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: addi a4, a3, 1 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a4, 0(a1) +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: blt a3, a2, .LBB0_1 +; RV32I-NEXT: .LBB0_2: # %while_end +; RV32I-NEXT: ret +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + br label %while_cond +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end +while_body: + %i = add i32 %phi, 1 + %j = add i32 %phi, 2 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + store i32 %i, i32* %gep2 + store i32 %phi, i32* %gep3 + br label %while_cond +while_end: + ret void +}