Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1312,9 +1312,11 @@ const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo(); for (MachineMemOperand *MMO : memoperands()) { - if (MMO->isVolatile()) return false; - // TODO: Figure out whether isAtomic is really necessary (see D57601). - if (MMO->isAtomic()) return false; + if (!MMO->isUnordered()) + // If the memory operand has ordering side effects, we can't move the + // instruction. Such an instruction is technically an invariant load, + // but the caller code would need updated to expect that. + return false; if (MMO->isStore()) return false; if (MMO->isInvariant() && MMO->isDereferenceable()) continue; Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -1796,7 +1796,14 @@ // instruction, so this needs to be after the folding logic. if (MI->isLoadFoldBarrier()) { LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); - FoldAsLoadDefCandidates.clear(); + SmallSet InvariantDefs; + for (unsigned VReg : FoldAsLoadDefCandidates) { + auto *DefMI = MRI->getVRegDef(VReg); + if (!DefMI->isDereferenceableInvariantLoad(nullptr)) + continue; + InvariantDefs.insert(VReg); + } + FoldAsLoadDefCandidates = InvariantDefs; } } } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4527,6 +4527,17 @@ auto Flags = MachineMemOperand::MOLoad; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + Flags |= MachineMemOperand::MOInvariant; + else if (auto *GV = dyn_cast(I.getPointerOperand())) + // FIXME: This is working around a lack of AA in PeepholeOpt. Standardize + // on one design, either configure early and don't recompute, or defer + // until use. + if (GV->isConstant()) + Flags |= MachineMemOperand::MOInvariant; + if (isDereferenceablePointer(I.getPointerOperand(), DAG.getDataLayout())) + Flags |= MachineMemOperand::MODereferenceable; + Flags |= TLI.getMMOFlags(I); MachineMemOperand *MMO = Index: test/CodeGen/X86/atomic-unordered.ll =================================================================== --- test/CodeGen/X86/atomic-unordered.ll +++ test/CodeGen/X86/atomic-unordered.ll @@ -2218,9 +2218,9 @@ ; ; CHECK-O3-LABEL: fold_constant_clobber: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq {{.*}}(%rip), %rax +; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: movq $5, (%rdi) -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p @@ -2238,9 +2238,9 @@ ; ; CHECK-O3-LABEL: fold_constant_fence: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq {{.*}}(%rip), %rax +; CHECK-O3-NEXT: movq %rdi, %rax ; CHECK-O3-NEXT: mfence -; CHECK-O3-NEXT: addq %rdi, %rax +; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst @@ -2248,7 +2248,7 @@ ret i64 %ret } -define i64 @fold_invariant_clobber(i64* %p, i64 %arg) { +define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O0-LABEL: fold_invariant_clobber: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax @@ -2258,9 +2258,9 @@ ; ; CHECK-O3-LABEL: fold_invariant_clobber: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: movq $5, (%rdi) -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p @@ -2269,7 +2269,7 @@ } -define i64 @fold_invariant_fence(i64* %p, i64 %arg) { +define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O0-LABEL: fold_invariant_fence: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rdi @@ -2280,9 +2280,9 @@ ; ; CHECK-O3-LABEL: fold_invariant_fence: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: mfence -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst Index: test/CodeGen/X86/hoist-invariant-load.ll =================================================================== --- test/CodeGen/X86/hoist-invariant-load.ll +++ test/CodeGen/X86/hoist-invariant-load.ll @@ -73,27 +73,35 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -32 -; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movl $10000, %ebp ## imm = 0x2710 -; CHECK-NEXT: movq _objc_msgSend@{{.*}}(%rip), %r14 +; CHECK-NEXT: movq {{.*}}(%rip), %r14 +; CHECK-NEXT: movq _objc_msgSend@{{.*}}(%rip), %r15 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_1: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq {{.*}}(%rip), %rsi ; CHECK-NEXT: movq %rbx, %rdi -; CHECK-NEXT: callq *%r14 +; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: callq *%r15 ; CHECK-NEXT: decl %ebp ; CHECK-NEXT: jne LBB1_1 ; CHECK-NEXT: ## %bb.2: ## %for.end +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: