Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19717,13 +19717,6 @@ } } -static bool hasMFENCE(const X86Subtarget &Subtarget) { - // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for - // no-sse2). There isn't any reason to disable it if the target processor - // supports it. - return Subtarget.hasSSE2() || Subtarget.is64Bit(); -} - LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -19763,7 +19756,7 @@ // the IR level, so we must wrap it in an intrinsic. return nullptr; - if (!hasMFENCE(Subtarget)) + if (!Subtarget.hasMFence()) // FIXME: it might make sense to use a locked operation here but on a // different cache-line to prevent cache-line bouncing. In practice it // is probably a small win, and x86 processors without mfence are rare @@ -19794,7 +19787,7 @@ // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { - if (hasMFENCE(Subtarget)) + if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -845,6 +845,7 @@ def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; +def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3770,7 +3770,7 @@ TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, - TB, Requires<[HasSSE2]>; + TB, Requires<[HasMFence]>; } // SchedRW def : Pat<(X86SFence), (SFENCE)>; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -441,6 +441,11 @@ bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } + /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for + /// no-sse2). There isn't any reason to disable it if the target processor + /// supports it. + bool hasMFence() const { return hasSSE2() || is64Bit(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } Index: test/CodeGen/X86/mfence.ll =================================================================== --- test/CodeGen/X86/mfence.ll +++ test/CodeGen/X86/mfence.ll @@ -1,11 +1,37 @@ -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64 + +; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use mfence. define void @test() { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: mfence -; CHECK-NEXT: retl +; X32-LABEL: test: +; X32: # BB#0: +; X32-NEXT: mfence +; X32-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: mfence +; X64-NEXT: retq fence seq_cst ret void } +define i32 @fence(i32* %ptr) { +; X32-LABEL: fence: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: mfence +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: retl +; +; X64-LABEL: fence: +; X64: # BB#0: +; X64-NEXT: mfence +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: retq + %atomic = atomicrmw add i32* %ptr, i32 0 seq_cst + ret i32 %atomic +} +