Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1327,6 +1327,11 @@ continue; if (const Value *V = MMO->getValue()) { + // In case we don't have AA, handle a few common cases + if (auto *GV = dyn_cast(V)) + if (GV->isConstant() && + MMO->getSize() <= GV->getParent()->getDataLayout().getTypeStoreSize(GV->getType())) + continue; // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -1796,7 +1796,14 @@ // instruction, so this needs to be after the folding logic. if (MI->isLoadFoldBarrier()) { LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); - FoldAsLoadDefCandidates.clear(); + SmallSet InvariantDefs; + for (unsigned VReg : FoldAsLoadDefCandidates) { + auto *DefMI = MRI->getVRegDef(VReg); + if (!DefMI->isDereferenceableInvariantLoad(nullptr)) + continue; + InvariantDefs.insert(VReg); + } + FoldAsLoadDefCandidates = InvariantDefs; } } } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4552,6 +4552,11 @@ AtomicOrdering Order = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); +#if 0 + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); +#endif + SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -4566,6 +4571,13 @@ Flags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) Flags |= MachineMemOperand::MOInvariant; +#if 0 + if (AA && + AA->pointsToConstantMemory(MemoryLocation(I.getPointerOperand(), + LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo))) + Flags |= MachineMemOperand::MOInvariant; +#endif if (isDereferenceablePointer(I.getPointerOperand(), DAG.getDataLayout())) Flags |= MachineMemOperand::MODereferenceable; Index: test/CodeGen/X86/atomic-unordered.ll =================================================================== --- test/CodeGen/X86/atomic-unordered.ll +++ test/CodeGen/X86/atomic-unordered.ll @@ -2516,9 +2516,9 @@ ; ; CHECK-O3-LABEL: fold_constant_clobber: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq {{.*}}(%rip), %rax +; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: movq $5, (%rdi) -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p @@ -2536,9 +2536,9 @@ ; ; CHECK-O3-LABEL: fold_constant_fence: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq {{.*}}(%rip), %rax +; CHECK-O3-NEXT: movq %rdi, %rax ; CHECK-O3-NEXT: mfence -; CHECK-O3-NEXT: addq %rdi, %rax +; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst @@ -2556,9 +2556,9 @@ ; ; CHECK-O3-LABEL: fold_invariant_clobber: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: movq $5, (%rdi) -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p @@ -2578,9 +2578,9 @@ ; ; CHECK-O3-LABEL: fold_invariant_fence: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: mfence -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst