diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -84,7 +84,7 @@ "without hoisting.")); static cl::opt ConstHoistGEP( - "consthoist-gep", cl::init(false), cl::Hidden, + "consthoist-gep", cl::init(true), cl::Hidden, cl::desc("Try hoisting constant gep expressions")); static cl::opt @@ -432,6 +432,22 @@ // to be cheaper than compute it by , which can be lowered to // an ADD instruction or folded into Load/Store instruction. InstructionCost Cost = + TTI->getIntImmCostInst(Instruction::GetElementPtr, 0, Offset, OffsetTy, + TargetTransformInfo::TCK_SizeAndLatency, Inst); + if (Cost == TTI::TCC_Free) { + LLVM_DEBUG(dbgs() << "GEP is free, no need to hoist it\n"); + return; + } + + unsigned AddrSpace = GEPO->getPointerAddressSpace(); + if (TTI->isLegalAddressingMode(GEPO->getResultElementType(), + BaseGV, Offset.getLimitedValue(), + /*HasBaseReg=*/true, /*Scale=*/0, + AddrSpace)) { + LLVM_DEBUG(dbgs() << "This expr can be folded into addressing mode, so no need to hoist it\n"); + return; + } + Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, OffsetTy, TargetTransformInfo::TCK_SizeAndLatency, Inst); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; @@ -646,9 +662,19 @@ ConstInfo.BaseInt = ConstInt; ConstInfo.BaseExpr = ConstExpr; Type *Ty = ConstInt->getType(); + unsigned short BaseConstDimentions = ConstExpr ? ConstExpr->getNumOperands() : 0; // Rebase the constants with respect to the base constant. for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + if(ConstCand != S && ConstCand->ConstExpr) { + unsigned short RebasedConstDimentions = ConstCand->ConstExpr->getNumOperands(); + if (RebasedConstDimentions == BaseConstDimentions && RebasedConstDimentions <= 3) { + //No need to rebase this constant in terms of the base constant; + // it will not add value because the address dimention is just <= 3, + // which is not complicated address calculation. + continue; + } + } APInt Diff = ConstCand->ConstInt->getValue() - ConstInt->getValue(); Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff); Type *ConstTy = @@ -857,6 +883,15 @@ unsigned ReBasesNum = 0; unsigned NotRebasedNum = 0; for (Instruction *IP : IPSet) { + if (ConstInfo.RebasedConstants.size() == 1 && + ConstInfo.RebasedConstants[0].Uses.size() == 1) { + if (IP->getParent() == ConstInfo.RebasedConstants[0].Uses[0].Inst->getParent()) { + // This means it's only single use for this const expr, and NOT inside a loop. + // No need to hoist single use. + continue; + } + } + // First, collect constants depending on this IP of the base. UsesNum = 0; SmallVector ToBeRebased; diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -15,11 +15,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: beq.w .LBB0_11 ; CHECK-NEXT: @ %bb.2: @ %for.cond1.preheader1 -; CHECK-NEXT: movw r8, :lower16:var_36 +; CHECK-NEXT: movw r0, :lower16:arr_61 +; CHECK-NEXT: movw r4, :lower16:var_36 +; CHECK-NEXT: movt r0, :upper16:arr_61 +; CHECK-NEXT: add.w r10, r0, #4 ; CHECK-NEXT: movw r0, #27476 -; CHECK-NEXT: addw r10, r2, #397 -; CHECK-NEXT: mov.w r9, #11 -; CHECK-NEXT: movt r8, :upper16:var_36 +; CHECK-NEXT: addw r9, r2, #397 +; CHECK-NEXT: mov.w r12, #22 +; CHECK-NEXT: mov.w r8, #11 +; CHECK-NEXT: movt r4, :upper16:var_36 ; CHECK-NEXT: sdiv r1, r0, r1 ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: .LBB0_3: @ %for.cond6.preheader @@ -27,108 +31,103 @@ ; CHECK-NEXT: @ Child Loop BB0_4 Depth 2 ; CHECK-NEXT: @ Child Loop BB0_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB0_8 Depth 2 -; CHECK-NEXT: movs r0, #22 -; CHECK-NEXT: dls lr, r9 -; CHECK-NEXT: mla r7, r11, r0, r10 -; CHECK-NEXT: movw r0, :lower16:arr_61 -; CHECK-NEXT: movt r0, :upper16:arr_61 -; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mla r6, r11, r12, r9 +; CHECK-NEXT: dls lr, r8 +; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: .LBB0_4: @ %for.body10 ; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: str r3, [r6, #-4] -; CHECK-NEXT: add.w r12, r3, #396 -; CHECK-NEXT: ldrb r5, [r7, #-1] -; CHECK-NEXT: add.w r3, r3, #792 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: str r7, [r3, #-4] +; CHECK-NEXT: add.w r5, r7, #396 +; CHECK-NEXT: ldrb r0, [r6, #-1] +; CHECK-NEXT: add.w r7, r7, #792 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ite ne -; CHECK-NEXT: sxthne r5, r1 -; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: str.w r12, [r6] -; CHECK-NEXT: cset r5, ne -; CHECK-NEXT: adds r6, #8 -; CHECK-NEXT: strb.w r5, [r8] -; CHECK-NEXT: ldrb r5, [r7] -; CHECK-NEXT: adds r7, #2 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: sxthne r0, r1 +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: str r5, [r3] +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: strb r0, [r4] +; CHECK-NEXT: ldrb r0, [r6] +; CHECK-NEXT: adds r6, #2 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ite ne -; CHECK-NEXT: sxthne r5, r1 -; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: cset r5, ne -; CHECK-NEXT: strb.w r5, [r8] +; CHECK-NEXT: sxthne r0, r1 +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: strb r0, [r4] ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup9 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: add.w r3, r11, #1 -; CHECK-NEXT: movs r7, #22 -; CHECK-NEXT: dls lr, r9 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: uxtb r3, r3 -; CHECK-NEXT: smlabb r7, r3, r7, r10 -; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: add.w r0, r11, #1 +; CHECK-NEXT: dls lr, r8 +; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: smlabb r6, r0, r12, r9 ; CHECK-NEXT: .LBB0_6: @ %for.body10.1 ; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: str r3, [r6, #-4] -; CHECK-NEXT: add.w r4, r3, #396 -; CHECK-NEXT: ldrb r5, [r7, #-1] -; CHECK-NEXT: add.w r3, r3, #792 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: str r7, [r3, #-4] +; CHECK-NEXT: add.w r5, r7, #396 +; CHECK-NEXT: ldrb r0, [r6, #-1] +; CHECK-NEXT: add.w r7, r7, #792 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ite ne -; CHECK-NEXT: sxthne r5, r1 -; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: str r4, [r6] -; CHECK-NEXT: cset r5, ne -; CHECK-NEXT: adds r6, #8 -; CHECK-NEXT: strb.w r5, [r8] -; CHECK-NEXT: ldrb r5, [r7] -; CHECK-NEXT: adds r7, #2 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: sxthne r0, r1 +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: str r5, [r3] +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: strb r0, [r4] +; CHECK-NEXT: ldrb r0, [r6] +; CHECK-NEXT: adds r6, #2 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ite ne -; CHECK-NEXT: sxthne r5, r1 -; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: cset r5, ne -; CHECK-NEXT: strb.w r5, [r8] +; CHECK-NEXT: sxthne r0, r1 +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: strb r0, [r4] ; CHECK-NEXT: le lr, .LBB0_6 ; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup9.1 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: add.w r3, r11, #2 -; CHECK-NEXT: movs r7, #22 -; CHECK-NEXT: dls lr, r9 -; CHECK-NEXT: uxtb r3, r3 -; CHECK-NEXT: smlabb r7, r3, r7, r10 +; CHECK-NEXT: add.w r0, r11, #2 +; CHECK-NEXT: dls lr, r8 ; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: mov r7, r10 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: smlabb r6, r0, r12, r9 ; CHECK-NEXT: .LBB0_8: @ %for.body10.2 ; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: str r3, [r0, #-4] -; CHECK-NEXT: ldrb r6, [r7, #-1] -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: ite ne -; CHECK-NEXT: sxthne r5, r1 -; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: add.w r6, r3, #396 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: str r6, [r0] -; CHECK-NEXT: cset r6, ne -; CHECK-NEXT: strb.w r6, [r8] +; CHECK-NEXT: str r3, [r7, #-4] +; CHECK-NEXT: add.w r5, r3, #396 +; CHECK-NEXT: ldrb r0, [r6, #-1] ; CHECK-NEXT: add.w r3, r3, #792 -; CHECK-NEXT: ldrb r6, [r7] -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: adds r7, #2 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: sxthne r0, r1 +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: str r5, [r7] +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: adds r7, #8 +; CHECK-NEXT: strb r0, [r4] +; CHECK-NEXT: ldrb r0, [r6] +; CHECK-NEXT: adds r6, #2 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ite ne -; CHECK-NEXT: sxthne r5, r1 -; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: cset r6, ne -; CHECK-NEXT: strb.w r6, [r8] +; CHECK-NEXT: sxthne r0, r1 +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: strb r0, [r4] ; CHECK-NEXT: le lr, .LBB0_8 ; CHECK-NEXT: @ %bb.9: @ %for.cond.cleanup9.2 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll --- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -528,239 +528,238 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov.w r1, #11 ; CHECK-NEXT: cinc r1, r1, ne ; CHECK-NEXT: movs r0, #38 ; CHECK-NEXT: mul r2, r1, r0 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: movw r0, :lower16:arr_22 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: movt r0, :upper16:arr_22 ; CHECK-NEXT: add.w r1, r2, #15 ; CHECK-NEXT: lsrs r3, r1, #4 -; CHECK-NEXT: strd r3, r2, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: strd r3, r2, [sp, #8] @ 8-byte Folded Spill ; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_2 ; CHECK-NEXT: .LBB19_1: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vstrb.8 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB19_1 ; CHECK-NEXT: .LBB19_2: @ %entry -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: movw r6, :lower16:arr_20 -; CHECK-NEXT: movt r6, :upper16:arr_20 -; CHECK-NEXT: add.w r3, r6, #80 +; CHECK-NEXT: movw r2, :lower16:arr_21 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: movt r2, :upper16:arr_21 +; CHECK-NEXT: movw r3, :lower16:arr_20 +; CHECK-NEXT: adds r2, #36 +; CHECK-NEXT: movt r3, :upper16:arr_20 ; CHECK-NEXT: dls lr, r0 -; CHECK-NEXT: movw r0, :lower16:arr_21 -; CHECK-NEXT: movt r0, :upper16:arr_21 -; CHECK-NEXT: add.w r5, r0, #36 -; CHECK-NEXT: add.w r11, r6, #128 -; CHECK-NEXT: add.w r7, r6, #112 -; CHECK-NEXT: add.w r2, r6, #96 -; CHECK-NEXT: add.w r4, r6, #64 -; CHECK-NEXT: add.w r0, r6, #48 -; CHECK-NEXT: add.w r1, r6, #32 -; CHECK-NEXT: add.w r12, r6, #16 -; CHECK-NEXT: adr r6, .LCPI19_0 -; CHECK-NEXT: vldrw.u32 q0, [r6] -; CHECK-NEXT: movw r6, :lower16:arr_20 -; CHECK-NEXT: mov.w r8, #327685 -; CHECK-NEXT: mov.w r9, #5 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: adr r2, .LCPI19_0 +; CHECK-NEXT: add.w r10, r3, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r11, r3, #112 +; CHECK-NEXT: add.w r4, r3, #96 +; CHECK-NEXT: add.w r6, r3, #80 +; CHECK-NEXT: add.w r7, r3, #64 +; CHECK-NEXT: add.w r5, r3, #48 +; CHECK-NEXT: add.w r0, r3, #32 +; CHECK-NEXT: add.w r1, r3, #16 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov.w r12, #327685 +; CHECK-NEXT: mov.w r8, #5 ; CHECK-NEXT: vmov.i16 q1, #0x5 -; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: movt r6, :upper16:arr_20 +; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: .LBB19_3: @ %for.cond8.preheader ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str r8, [r5, #-4] -; CHECK-NEXT: vstrh.16 q1, [r5, #-36] -; CHECK-NEXT: strh.w r9, [r5] -; CHECK-NEXT: vstrh.16 q1, [r5, #-20] -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: vstrh.16 q0, [r12], #152 -; CHECK-NEXT: vstrh.16 q0, [r6], #152 +; CHECK-NEXT: str r12, [r3, #-4] +; CHECK-NEXT: vstrh.16 q1, [r3, #-36] +; CHECK-NEXT: strh.w r8, [r3] +; CHECK-NEXT: vstrh.16 q1, [r3, #-20] +; CHECK-NEXT: vstrw.32 q0, [r6] ; CHECK-NEXT: vstrh.16 q0, [r1], #152 -; CHECK-NEXT: vstrh.16 q0, [r0], #152 -; CHECK-NEXT: vstrh.16 q0, [r4], #152 ; CHECK-NEXT: vstrh.16 q0, [r2], #152 +; CHECK-NEXT: vstrh.16 q0, [r0], #152 +; CHECK-NEXT: vstrh.16 q0, [r5], #152 ; CHECK-NEXT: vstrh.16 q0, [r7], #152 +; CHECK-NEXT: vstrh.16 q0, [r4], #152 ; CHECK-NEXT: vstrh.16 q0, [r11], #152 -; CHECK-NEXT: strd r9, r10, [r3, #64] -; CHECK-NEXT: adds r5, #38 -; CHECK-NEXT: adds r3, #152 +; CHECK-NEXT: vstrh.16 q0, [r10], #152 +; CHECK-NEXT: strd r8, r9, [r6, #64] +; CHECK-NEXT: adds r3, #38 +; CHECK-NEXT: adds r6, #152 ; CHECK-NEXT: le lr, .LBB19_3 ; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup6 ; CHECK-NEXT: movw r0, :lower16:arr_22 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: movt r0, :upper16:arr_22 -; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: add.w r0, r0, #1824 ; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_6 ; CHECK-NEXT: .LBB19_5: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vstrb.8 q1, [r0], #16 ; CHECK-NEXT: letp lr, .LBB19_5 ; CHECK-NEXT: .LBB19_6: @ %for.cond.cleanup6 -; CHECK-NEXT: movw r6, :lower16:arr_20 -; CHECK-NEXT: movw r0, #7376 -; CHECK-NEXT: movt r6, :upper16:arr_20 -; CHECK-NEXT: adds r3, r6, r0 +; CHECK-NEXT: movw r2, :lower16:arr_20 ; CHECK-NEXT: movw r0, #7408 -; CHECK-NEXT: add.w r12, r6, r0 +; CHECK-NEXT: movt r2, :upper16:arr_20 +; CHECK-NEXT: add.w r11, r2, r0 +; CHECK-NEXT: movw r0, #7376 +; CHECK-NEXT: adds r6, r2, r0 ; CHECK-NEXT: movw r0, #7344 -; CHECK-NEXT: add.w r9, r6, r0 +; CHECK-NEXT: adds r7, r2, r0 ; CHECK-NEXT: movw r0, #7312 -; CHECK-NEXT: adds r2, r6, r0 -; CHECK-NEXT: movw r0, :lower16:arr_21 -; CHECK-NEXT: add.w r1, r6, #7424 -; CHECK-NEXT: add.w r7, r6, #7392 -; CHECK-NEXT: add.w r4, r6, #7360 -; CHECK-NEXT: add.w r5, r6, #7328 -; CHECK-NEXT: add.w r8, r6, #7296 -; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: movt r0, :upper16:arr_21 -; CHECK-NEXT: addw r0, r0, #1860 -; CHECK-NEXT: mov.w r10, #5 -; CHECK-NEXT: dls lr, r6 -; CHECK-NEXT: mov.w r6, #327685 +; CHECK-NEXT: add.w r5, r2, #7424 +; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: add.w r3, r2, #7392 +; CHECK-NEXT: add.w r12, r2, #7360 +; CHECK-NEXT: add.w r4, r2, #7328 +; CHECK-NEXT: add.w r2, r2, #7296 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov.w r9, #327685 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov.w r8, #5 +; CHECK-NEXT: dls lr, r2 ; CHECK-NEXT: vmov.i16 q1, #0x5 -; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: add.w r1, r1, #1824 +; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: .LBB19_7: @ %for.cond8.preheader.1 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str r6, [r0, #-4] -; CHECK-NEXT: vstrh.16 q1, [r0, #-36] -; CHECK-NEXT: strh.w r10, [r0] -; CHECK-NEXT: vstrh.16 q1, [r0, #-20] -; CHECK-NEXT: vstrw.32 q0, [r3] +; CHECK-NEXT: str r9, [r1, #-4] +; CHECK-NEXT: vstrh.16 q1, [r1, #-36] +; CHECK-NEXT: strh.w r8, [r1] +; CHECK-NEXT: vstrh.16 q1, [r1, #-20] +; CHECK-NEXT: vstrw.32 q0, [r6] +; CHECK-NEXT: vstrh.16 q0, [r0], #152 ; CHECK-NEXT: vstrh.16 q0, [r2], #152 -; CHECK-NEXT: vstrh.16 q0, [r8], #152 -; CHECK-NEXT: vstrh.16 q0, [r5], #152 -; CHECK-NEXT: vstrh.16 q0, [r9], #152 ; CHECK-NEXT: vstrh.16 q0, [r4], #152 ; CHECK-NEXT: vstrh.16 q0, [r7], #152 ; CHECK-NEXT: vstrh.16 q0, [r12], #152 -; CHECK-NEXT: vstrh.16 q0, [r1], #152 -; CHECK-NEXT: strd r10, r11, [r3, #64] -; CHECK-NEXT: adds r0, #38 -; CHECK-NEXT: adds r3, #152 +; CHECK-NEXT: vstrh.16 q0, [r3], #152 +; CHECK-NEXT: vstrh.16 q0, [r11], #152 +; CHECK-NEXT: vstrh.16 q0, [r5], #152 +; CHECK-NEXT: strd r8, r10, [r6, #64] +; CHECK-NEXT: adds r1, #38 +; CHECK-NEXT: adds r6, #152 ; CHECK-NEXT: le lr, .LBB19_7 ; CHECK-NEXT: @ %bb.8: @ %for.cond.cleanup6.1 -; CHECK-NEXT: movw r0, :lower16:arr_22 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: movt r0, :upper16:arr_22 -; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-NEXT: add.w r0, r0, #3648 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: add.w r0, r0, #1824 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_10 ; CHECK-NEXT: .LBB19_9: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vstrb.8 q1, [r0], #16 ; CHECK-NEXT: letp lr, .LBB19_9 ; CHECK-NEXT: .LBB19_10: @ %for.cond.cleanup6.1 -; CHECK-NEXT: movw r7, :lower16:arr_20 -; CHECK-NEXT: movw r0, #14672 -; CHECK-NEXT: movt r7, :upper16:arr_20 -; CHECK-NEXT: adds r3, r7, r0 +; CHECK-NEXT: movw r2, :lower16:arr_20 ; CHECK-NEXT: movw r0, #14704 -; CHECK-NEXT: add.w r12, r7, r0 +; CHECK-NEXT: movt r2, :upper16:arr_20 +; CHECK-NEXT: add.w r12, r2, r0 ; CHECK-NEXT: movw r0, #14688 -; CHECK-NEXT: add.w r8, r7, r0 +; CHECK-NEXT: add.w r9, r2, r0 +; CHECK-NEXT: movw r0, #14672 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload +; CHECK-NEXT: adds r7, r2, r0 +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: movw r0, #14640 -; CHECK-NEXT: add.w r9, r7, r0 +; CHECK-NEXT: movw r1, #14608 +; CHECK-NEXT: add.w r8, r2, r0 ; CHECK-NEXT: movw r0, #14624 -; CHECK-NEXT: adds r2, r7, r0 -; CHECK-NEXT: movw r0, #14608 -; CHECK-NEXT: movw r1, :lower16:arr_21 -; CHECK-NEXT: add r0, r7 -; CHECK-NEXT: add.w r4, r7, #14720 -; CHECK-NEXT: add.w r5, r7, #14656 -; CHECK-NEXT: add.w r6, r7, #14592 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: movt r1, :upper16:arr_21 -; CHECK-NEXT: addw r1, r1, #3684 +; CHECK-NEXT: dls lr, r5 +; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: add r1, r2 +; CHECK-NEXT: add.w r4, r2, #14720 +; CHECK-NEXT: add.w r3, r3, #3648 +; CHECK-NEXT: add.w r2, r2, #14656 +; CHECK-NEXT: add.w r6, r6, #7296 +; CHECK-NEXT: mov.w r5, #327685 ; CHECK-NEXT: mov.w r10, #5 -; CHECK-NEXT: dls lr, r7 -; CHECK-NEXT: mov.w r7, #327685 ; CHECK-NEXT: vmov.i16 q1, #0x5 ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: .LBB19_11: @ %for.cond8.preheader.2 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str r7, [r1, #-4] -; CHECK-NEXT: vstrh.16 q1, [r1, #-36] -; CHECK-NEXT: strh.w r10, [r1] -; CHECK-NEXT: vstrh.16 q1, [r1, #-20] -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: vstrh.16 q0, [r0], #152 +; CHECK-NEXT: str r5, [r3, #-4] +; CHECK-NEXT: vstrh.16 q1, [r3, #-36] +; CHECK-NEXT: strh.w r10, [r3] +; CHECK-NEXT: vstrh.16 q1, [r3, #-20] +; CHECK-NEXT: vstrw.32 q0, [r7] +; CHECK-NEXT: vstrh.16 q0, [r1], #152 ; CHECK-NEXT: vstrh.16 q0, [r6], #152 +; CHECK-NEXT: vstrh.16 q0, [r0], #152 +; CHECK-NEXT: vstrh.16 q0, [r8], #152 ; CHECK-NEXT: vstrh.16 q0, [r2], #152 ; CHECK-NEXT: vstrh.16 q0, [r9], #152 -; CHECK-NEXT: vstrh.16 q0, [r5], #152 -; CHECK-NEXT: vstrh.16 q0, [r8], #152 ; CHECK-NEXT: vstrh.16 q0, [r12], #152 ; CHECK-NEXT: vstrh.16 q0, [r4], #152 -; CHECK-NEXT: strd r10, r11, [r3, #64] -; CHECK-NEXT: adds r1, #38 -; CHECK-NEXT: adds r3, #152 +; CHECK-NEXT: strd r10, r11, [r7, #64] +; CHECK-NEXT: adds r3, #38 +; CHECK-NEXT: adds r7, #152 ; CHECK-NEXT: le lr, .LBB19_11 ; CHECK-NEXT: @ %bb.12: @ %for.cond.cleanup6.2 -; CHECK-NEXT: movw r0, :lower16:arr_22 -; CHECK-NEXT: ldrd r2, r1, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: movt r0, :upper16:arr_22 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: add.w r0, r0, #5472 +; CHECK-NEXT: ldrd r2, r1, [sp, #8] @ 8-byte Folded Reload +; CHECK-NEXT: add.w r0, r0, #3648 ; CHECK-NEXT: wlstp.8 lr, r1, .LBB19_14 ; CHECK-NEXT: .LBB19_13: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vstrb.8 q1, [r0], #16 ; CHECK-NEXT: letp lr, .LBB19_13 ; CHECK-NEXT: .LBB19_14: @ %for.cond.cleanup6.2 -; CHECK-NEXT: movw r2, :lower16:arr_21 -; CHECK-NEXT: movw r1, #5508 -; CHECK-NEXT: movt r2, :upper16:arr_21 -; CHECK-NEXT: movw r7, :lower16:arr_20 -; CHECK-NEXT: add r2, r1 -; CHECK-NEXT: movw r1, #22000 -; CHECK-NEXT: movt r7, :upper16:arr_20 -; CHECK-NEXT: add.w r12, r7, r1 -; CHECK-NEXT: movw r1, #21984 -; CHECK-NEXT: add.w r8, r7, r1 -; CHECK-NEXT: movw r1, #21952 -; CHECK-NEXT: add.w r9, r7, r1 -; CHECK-NEXT: movw r1, #21936 +; CHECK-NEXT: movw r2, :lower16:arr_20 +; CHECK-NEXT: movw r0, #22000 +; CHECK-NEXT: movt r2, :upper16:arr_20 +; CHECK-NEXT: add.w r12, r2, r0 +; CHECK-NEXT: movw r0, #21984 +; CHECK-NEXT: add.w r8, r2, r0 ; CHECK-NEXT: movw r0, #21968 -; CHECK-NEXT: adds r5, r7, r1 -; CHECK-NEXT: movw r1, #21920 -; CHECK-NEXT: movw r3, #21904 -; CHECK-NEXT: adds r4, r7, r3 -; CHECK-NEXT: add r0, r7 -; CHECK-NEXT: add r1, r7 -; CHECK-NEXT: add.w r3, r7, #22016 -; CHECK-NEXT: add.w r6, r7, #21888 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adds r3, r2, r0 +; CHECK-NEXT: movw r0, #21952 +; CHECK-NEXT: add.w r9, r2, r0 +; CHECK-NEXT: movw r0, #21936 +; CHECK-NEXT: adds r6, r2, r0 +; CHECK-NEXT: movw r0, #21920 +; CHECK-NEXT: adds r1, r2, r0 +; CHECK-NEXT: movw r0, #21904 +; CHECK-NEXT: adds r5, r2, r0 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov.w r10, #5 +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: vmov.i16 q1, #0x5 -; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: add.w r4, r0, #5472 +; CHECK-NEXT: add.w r0, r2, #22016 ; CHECK-NEXT: dls lr, r7 +; CHECK-NEXT: add.w r2, r2, #21888 ; CHECK-NEXT: mov.w r7, #327685 +; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: .LBB19_15: @ %for.cond8.preheader.3 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str r7, [r2, #-4] -; CHECK-NEXT: vstrh.16 q1, [r2, #-36] -; CHECK-NEXT: strh.w r10, [r2] -; CHECK-NEXT: vstrh.16 q1, [r2, #-20] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r4], #152 -; CHECK-NEXT: vstrh.16 q0, [r6], #152 -; CHECK-NEXT: vstrh.16 q0, [r1], #152 +; CHECK-NEXT: str r7, [r4, #-4] +; CHECK-NEXT: vstrh.16 q1, [r4, #-36] +; CHECK-NEXT: strh.w r10, [r4] +; CHECK-NEXT: vstrh.16 q1, [r4, #-20] +; CHECK-NEXT: vstrw.32 q0, [r3] ; CHECK-NEXT: vstrh.16 q0, [r5], #152 +; CHECK-NEXT: vstrh.16 q0, [r2], #152 +; CHECK-NEXT: vstrh.16 q0, [r1], #152 +; CHECK-NEXT: vstrh.16 q0, [r6], #152 ; CHECK-NEXT: vstrh.16 q0, [r9], #152 ; CHECK-NEXT: vstrh.16 q0, [r8], #152 ; CHECK-NEXT: vstrh.16 q0, [r12], #152 -; CHECK-NEXT: vstrh.16 q0, [r3], #152 -; CHECK-NEXT: strd r10, r11, [r0, #64] -; CHECK-NEXT: adds r2, #38 -; CHECK-NEXT: adds r0, #152 +; CHECK-NEXT: vstrh.16 q0, [r0], #152 +; CHECK-NEXT: strd r10, r11, [r3, #64] +; CHECK-NEXT: adds r4, #38 +; CHECK-NEXT: adds r3, #152 ; CHECK-NEXT: le lr, .LBB19_15 ; CHECK-NEXT: @ %bb.16: @ %for.cond.cleanup6.3 -; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.17: