Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -778,6 +778,7 @@ // Remember any instructions that read/write memory between FirstMI and MI. SmallVector MemInsns; + bool FoundLdSt = false; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr *MI = MBBI; // Skip DBG_VALUE instructions. Otherwise debug info can affect the @@ -788,6 +789,15 @@ // Now that we know this is a real instruction, count it. ++Count; + // If FirstMI or MI is an OrderedMemoryRef and we've encountered a + // memory operation we must stop scanning to prevent reordering. + if (FoundLdSt && (FirstMI->hasOrderedMemoryRef() || MI->hasOrderedMemoryRef())) + return E; + + // Track that we've seen a memory operation. This is use to ensure we don't + // reorder OrderedMemoryRefs. + FoundLdSt |= MI->mayLoadOrStore(); + bool CanMergeOpc = Opc == MI->getOpcode(); Flags.setSExtIdx(-1); if (!CanMergeOpc) { @@ -817,10 +827,9 @@ if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || (Offset + OffsetStride == MIOffset))) { int MinOffset = Offset < MIOffset ? Offset : MIOffset; - // If this is a volatile load/store that otherwise matched, stop looking - // as something is going on that we don't have enough information to - // safely transform. Similarly, stop if we see a hint to avoid pairs. - if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) + + // Stop if we see a hint to avoid pairs. + if (TII->isLdStPairSuppressed(MI)) return E; // If the resultant immediate offset of merging these instructions // is out of range for a pairwise instruction, bail and keep looking. @@ -1135,9 +1144,6 @@ MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = MBBI; MachineBasicBlock::iterator E = MI->getParent()->end(); - // If this is a volatile load/store, don't mess with it. - if (MI->hasOrderedMemoryRef()) - return false; // Make sure this is a reg+imm (as opposed to an address reloc). if (!getLdStOffsetOp(MI).isImm()) Index: test/CodeGen/AArch64/arm64-ldp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ldp.ll +++ test/CodeGen/AArch64/arm64-ldp.ll @@ -356,3 +356,87 @@ ret i64 %add } +; Combine adjacent volatile loads + +; CHECK-LABEL: @volatile1 +; CHECK: ldp +; CHECK: ret +define i64 @volatile1(i64* %p) { + %add.ptr = getelementptr inbounds i64, i64* %p, i64 0 + %tmp = load volatile i64, i64* %add.ptr, align 8 + %add.ptr1 = getelementptr inbounds i64, i64* %p, i64 1 + %tmp1 = load i64, i64* %add.ptr1, align 8 + %add = add nsw i64 %tmp1, %tmp + ret i64 %add +} + +; CHECK-LABEL: @volatile2 +; CHECK: ldp +; CHECK: ret +define i64 @volatile2(i64* %p) { + %add.ptr = getelementptr inbounds i64, i64* %p, i64 0 + %tmp = load i64, i64* %add.ptr, align 8 + %add.ptr1 = getelementptr inbounds i64, i64* %p, i64 1 + %tmp1 = load volatile i64, i64* %add.ptr1, align 8 + %add = add nsw i64 %tmp1, %tmp + ret i64 %add +} + +; CHECK-LABEL: @volatile3 +; CHECK: ldp +; CHECK: ret +define i64 @volatile3(i64* %p) { + %add.ptr = getelementptr inbounds i64, i64* %p, i64 0 + %tmp = load volatile i64, i64* %add.ptr, align 8 + %add.ptr1 = getelementptr inbounds i64, i64* %p, i64 1 + %tmp1 = load volatile i64, i64* %add.ptr1, align 8 + %add = add nsw i64 %tmp1, %tmp + ret i64 %add +} + +; This will be paired, but I have no idea if this is actually safe.. +; CHECK-LABEL: @volatile4 +; CHECK-NOT: fail +; CHECK: ret +define i64 @volatile4(i64* %p) { + %add.ptr1 = getelementptr inbounds i64, i64* %p, i64 1 + %tmp1 = load volatile i64, i64* %add.ptr1, align 8 + %add.ptr = getelementptr inbounds i64, i64* %p, i64 0 + %tmp = load volatile i64, i64* %add.ptr, align 8 + %add = add nsw i64 %tmp1, %tmp + ret i64 %add +} + +; CHECK-LABEL: @volatile5 +; CHECK-NOT: ldp +; CHECK: ret +define i64 @volatile5(i64* %p) { + %add.ptr = getelementptr inbounds i64, i64* %p, i64 0 + %tmp = load volatile i64, i64* %add.ptr, align 8 + %add.ptr3 = getelementptr inbounds i64, i64* %p, i64 3 + %tmp3 = load i64, i64* %add.ptr3, align 8 + %add.ptr1 = getelementptr inbounds i64, i64* %p, i64 1 + %tmp1 = load i64, i64* %add.ptr1, align 8 + %add = add nsw i64 %tmp3, %tmp + %add1 = add nsw i64 %tmp1, %add + ret i64 %add1 +} + +; CHECK-LABEL: @volatile10 +; CHECK: ldr +; CHECK: ldp +; CHECK: ldr +define i64 @volatile10(i64* %p) { + %add.ptr = getelementptr inbounds i64, i64* %p, i64 0 + %tmp = load volatile i64, i64* %add.ptr, align 8 + %add.ptr1 = getelementptr inbounds i64, i64* %p, i64 2 + %tmp1 = load volatile i64, i64* %add.ptr1, align 8 + %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 3 + %tmp2 = load volatile i64, i64* %add.ptr2, align 8 + %add.ptr3 = getelementptr inbounds i64, i64* %p, i64 1 + %tmp3 = load volatile i64, i64* %add.ptr3, align 8 + %add = add nsw i64 %tmp1, %tmp + %add2 = add nsw i64 %tmp2, %add + %add3 = add nsw i64 %tmp3, %add2 + ret i64 %add3 +}