Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -4202,15 +4202,11 @@ if (isa(Base) || isa(Base) || (BaseI && !isa(BaseI) && !isa(BaseI))) { - // If the base is an instruction, make sure the GEP is not in the same - // basic block as the base. If the base is an argument or global - // value, make sure the GEP is not in the entry block. Otherwise, - // instruction selection can undo the split. Also make sure the - // parent block allows inserting non-PHI instructions before the - // terminator. + // Make sure the parent block allows inserting non-PHI instructions + // before the terminator. BasicBlock *Parent = BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); - if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad()) + if (!Parent->getTerminator()->isEHPad()) LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); } } @@ -4740,8 +4736,7 @@ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); GetElementPtrInst *GEP = LargeOffsetGEP.first; - if (GEP && GEP->getParent() != MemoryInst->getParent() && - !NewGEPBases.count(GEP)) { + if (GEP && !NewGEPBases.count(GEP)) { // If splitting the underlying data structure can reduce the offset of a // GEP, collect the GEP. Skip the GEPs that are the new bases of // previously split data structures. Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -462,6 +462,9 @@ SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); + bool reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, SDValue N0, + SDValue N1); SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1); SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, @@ -1039,6 +1042,62 @@ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } +bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, + SDValue N0, + SDValue N1) { + // Currently this only tries to ensure we don't undo the GEP splits done by + // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this, + // we check if the following transformation would be problematic: + // (load/store (add, (add, x, offset1), offset2)) -> + // (load/store (add, x, offset1+offset2)). + + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) + return false; + + if (N0.hasOneUse()) + return false; + + auto *C1 = dyn_cast(N0.getOperand(1)); + auto *C2 = dyn_cast(N1); + if (!C1 || !C2) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt &C2APIntVal = C2->getAPIntValue(); + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getBitWidth() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N0->uses()) { + auto LoadStore = dyn_cast(Node); + if (LoadStore) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; + } + } + + return false; +} + // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, @@ -2262,9 +2321,10 @@ return NewSel; // reassociate add - if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) - return RADD; - + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { + if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) + return RADD; + } // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); Index: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h @@ -157,6 +157,7 @@ template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + bool shouldConsiderGEPOffsetSplit() const override { return true; } SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; Index: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll +++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -1,5 +1,5 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI -check-prefix=CI-NOHSA %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -172,9 +172,10 @@ ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: ; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} ; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; CI-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} +; CI-NOHSA-NOT: v_add +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 +; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} @@ -202,14 +203,19 @@ ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: -; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} -; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} -; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} -; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} -; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} -; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} -; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} +; SI: s_mov_b32 {{s[0-9]+}}, 0x13480 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 +; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} +; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} +; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; CI-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} +; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} +; CI-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} +; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} Index: llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll +++ llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll @@ -76,24 +76,25 @@ ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]] ; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]] ; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]] } @@ -165,20 +166,27 @@ ; CHECK-LABEL: aesda: ; CHECK: aesd.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]] + ; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]] + +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]] + ; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]] + +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]] + ; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QH]] @@ -207,6 +215,7 @@ ; CHECK-LABEL: aes_load_store: ; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]] + ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] } Index: llvm/trunk/test/CodeGen/ARM/vector-spilling.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/vector-spilling.ll +++ llvm/trunk/test/CodeGen/ARM/vector-spilling.ll @@ -22,8 +22,8 @@ %6 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 3 %7 = load <8 x i64>, <8 x i64>* %6, align 8 - %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> - %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> + %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> + %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9) ret void Index: llvm/trunk/test/CodeGen/RISCV/split-offsets.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/split-offsets.ll +++ llvm/trunk/test/CodeGen/RISCV/split-offsets.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +; Check that memory accesses to array elements with large offsets have those +; offsets split into a base offset, plus a smaller offset that is folded into +; the memory operation. We should also only compute that base offset once, +; since it can be shared for all memory operations in this test. +define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a2, 20 +; RV32I-NEXT: addi a2, a2, -1920 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: addi a3, zero, 1 +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: add a0, a1, a2 +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a2, 20 +; RV64I-NEXT: addiw a2, a2, -1920 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: addi a3, zero, 1 +; RV64I-NEXT: sw a3, 4(a0) +; RV64I-NEXT: addi a4, zero, 2 +; RV64I-NEXT: sw a4, 0(a0) +; RV64I-NEXT: add a0, a1, a2 +; RV64I-NEXT: sw a4, 4(a0) +; RV64I-NEXT: sw a3, 0(a0) +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + store i32 2, i32* %gep0 + store i32 1, i32* %gep1 + store i32 1, i32* %gep2 + store i32 2, i32* %gep3 + ret void +} + +; Ditto. Check it when the GEPs are not in the entry block. +define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a3, 20 +; RV32I-NEXT: addi a3, a3, -1920 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a3 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: bge a3, a2, .LBB1_2 +; RV32I-NEXT: .LBB1_1: # %while_body +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: addi a4, a3, 1 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a4, 0(a1) +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: blt a3, a2, .LBB1_1 +; RV32I-NEXT: .LBB1_2: # %while_end +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a3, 20 +; RV64I-NEXT: addiw a3, a3, -1920 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: add a0, a0, a3 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: sext.w a2, a2 +; RV64I-NEXT: mv a3, zero +; RV64I-NEXT: sext.w a4, a3 +; RV64I-NEXT: bge a4, a2, .LBB1_2 +; RV64I-NEXT: .LBB1_1: # %while_body +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sw a3, 4(a0) +; RV64I-NEXT: addi a4, a3, 1 +; RV64I-NEXT: sw a4, 0(a0) +; RV64I-NEXT: sw a3, 4(a1) +; RV64I-NEXT: sw a4, 0(a1) +; RV64I-NEXT: mv a3, a4 +; RV64I-NEXT: sext.w a4, a3 +; RV64I-NEXT: blt a4, a2, .LBB1_1 +; RV64I-NEXT: .LBB1_2: # %while_end +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + br label %while_cond +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end +while_body: + %i = add i32 %phi, 1 + %j = add i32 %phi, 2 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + store i32 %i, i32* %gep2 + store i32 %phi, i32* %gep3 + br label %while_cond +while_end: + ret void +} + Index: llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll +++ llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll @@ -50,9 +50,8 @@ ; Test the next doubleword up, which requires separate address logic for ALG. define void @f4(i128 *%aptr, i64 %base) { ; CHECK-LABEL: f4: -; CHECK: lgr [[BASE:%r[1-5]]], %r3 -; CHECK: agfi [[BASE]], 524288 -; CHECK: alg {{%r[0-5]}}, 0([[BASE]]) +; CHECK: lay [[BASE:%r[1-5]]], 524280(%r3) +; CHECK: alg {{%r[0-5]}}, 8([[BASE]]) ; CHECK: alcg {{%r[0-5]}}, 524280(%r3) ; CHECK: br %r14 %addr = add i64 %base, 524280 @@ -65,11 +64,10 @@ } ; Test the next doubleword after that, which requires separate logic for -; both instructions. It would be better to create an anchor at 524288 -; that both instructions can use, but that isn't implemented yet. +; both instructions. define void @f5(i128 *%aptr, i64 %base) { ; CHECK-LABEL: f5: -; CHECK: alg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: alg {{%r[0-5]}}, 8({{%r[1-5]}}) ; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: br %r14 %addr = add i64 %base, 524288 Index: llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll +++ llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll @@ -55,9 +55,8 @@ ; Test the next doubleword up, which requires separate address logic for SLG. define void @f4(i64 %base) { ; CHECK-LABEL: f4: -; CHECK: lgr [[BASE:%r[1-5]]], %r2 -; CHECK: agfi [[BASE]], 524288 -; CHECK: slg {{%r[0-5]}}, 0([[BASE]]) +; CHECK: lay [[BASE:%r[1-5]]], 524280(%r2) +; CHECK: slg {{%r[0-5]}}, 8([[BASE]]) ; CHECK: slbg {{%r[0-5]}}, 524280(%r2) ; CHECK: br %r14 %addr = add i64 %base, 524280 @@ -71,11 +70,10 @@ } ; Test the next doubleword after that, which requires separate logic for -; both instructions. It would be better to create an anchor at 524288 -; that both instructions can use, but that isn't implemented yet. +; both instructions. define void @f5(i64 %base) { ; CHECK-LABEL: f5: -; CHECK: slg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: slg {{%r[0-5]}}, 8({{%r[1-5]}}) ; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}}) ; CHECK: br %r14 %addr = add i64 %base, 524288