Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -99,7 +99,8 @@ // Return the matching instruction if one is found, else MBB->end(). MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, - unsigned Limit); + unsigned Limit, + bool FindNarrowMerge); // Scan the instructions looking for a store that writes to the address from // which the current load instruction reads. Return true if one is found. @@ -753,7 +754,8 @@ MergeMI->eraseFromParent(); return NextI; } - assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store"); + assert(isPromotableZeroStoreInst(I) && isPromotableZeroStoreInst(MergeMI) && + "Expected promotable zero store"); // Construct the new instruction. MachineInstrBuilder MIB; @@ -1177,7 +1179,8 @@ /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - LdStPairFlags &Flags, unsigned Limit) { + LdStPairFlags &Flags, unsigned Limit, + bool FindNarrowMerge) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; @@ -1251,26 +1254,26 @@ // safely transform. Similarly, stop if we see a hint to avoid pairs. if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) return E; - // If the resultant immediate offset of merging these instructions - // is out of range for a pairwise instruction, bail and keep looking. - bool IsNarrowLoad = isNarrowLoad(MI->getOpcode()); - if (!IsNarrowLoad && - !inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - MemInsns.push_back(MI); - continue; - } - if (IsNarrowLoad || IsPromotableZeroStore) { + if (FindNarrowMerge) { // If the alignment requirements of the scaled wide load/store - // instruction can't express the offset of the scaled narrow - // input, bail and keep looking. - if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { + // instruction can't express the offset of the scaled narrow input, + // bail and keep looking. For promotable zero stores, allow only when + // the stored value is the same (i.e., WZR). + if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) || + (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; } } else { + // If the resultant immediate offset of merging these instructions + // is out of range for a pairwise instruction, bail and keep looking. + if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + MemInsns.push_back(MI); + continue; + } // If the alignment requirements of the paired (scaled) instruction // can't express the offset of the unscaled input, bail and keep // looking. @@ -1283,10 +1286,7 @@ // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. - // For narrow stores, allow only when the stored value is the same - // (i.e., WZR). - if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || - (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { + if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; @@ -1605,7 +1605,7 @@ // Look ahead up to LdStLimit instructions for a mergable instruction. LdStPairFlags Flags; MachineBasicBlock::iterator MergeMI = - findMatchingInsn(MBBI, Flags, LdStLimit); + findMatchingInsn(MBBI, Flags, LdStLimit, /*FindNarrowMerge=*/true); if (MergeMI != E) { if (isNarrowLoad(MI)) { ++NumNarrowLoadsPromoted; @@ -1640,7 +1640,8 @@ // Look ahead up to LdStLimit instructions for a pairable instruction. LdStPairFlags Flags; - MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, LdStLimit); + MachineBasicBlock::iterator Paired = + findMatchingInsn(MBBI, Flags, LdStLimit, /*FindNarrowMerge=*/false); if (Paired != E) { ++NumPairCreated; if (TII->isUnscaledLdSt(MI)) Index: test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll =================================================================== --- test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll +++ test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll @@ -353,8 +353,8 @@ ret void } -;CHECK-LABEL: Strw_zero -;CHECK : str xzr +; CHECK-LABEL: Strw_zero +; CHECK: str xzr define void @Strw_zero(i32* nocapture %P, i32 %n) { entry: %idxprom = sext i32 %n to i64 @@ -367,8 +367,22 @@ ret void } -;CHECK-LABEL: Strw_zero_4 -;CHECK : stp xzr +; CHECK-LABEL: Strw_zero_nonzero +; CHECK: stp wzr, w1 +define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + store i32 0, i32* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1 + store i32 %n, i32* %arrayidx2 + ret void +} + +; CHECK-LABEL: Strw_zero_4 +; CHECK: stp xzr define void @Strw_zero_4(i32* nocapture %P, i32 %n) { entry: %idxprom = sext i32 %n to i64 @@ -442,8 +456,8 @@ ret void } -;CHECK-LABEL: Sturw_zero -;CHECK : stur xzr +; CHECK-LABEL: Sturw_zero +; CHECK: stur xzr define void @Sturw_zero(i32* nocapture %P, i32 %n) { entry: %sub = add nsw i32 %n, -3 @@ -457,8 +471,8 @@ ret void } -;CHECK-LABEL: Sturw_zero_4 -;CHECK : str xzr +; CHECK-LABEL: Sturw_zero_4 +; CHECK: stp xzr, xzr define void @Sturw_zero_4(i32* nocapture %P, i32 %n) { entry: %sub = add nsw i32 %n, -3