Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -485,6 +485,9 @@ setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } + if (!Subtarget.is64Bit()) + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + if (Subtarget.hasCmpxchg16b()) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } @@ -25494,11 +25497,20 @@ } // Note: this turns large loads into lock cmpxchg8b/16b. -// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. +// TODO: In 32-bit mode, use MOVLPS when SSE1 is available? +// TODO: In 32-bit mode, use FILD/FISTP when X87 is available? TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - return needsCmpXchgNb(LI->getType()) ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + Type *MemType = LI->getType(); + + // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we + // can use movq to do the load. + if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && + isTypeLegal(MVT::v2i64)) + return AtomicExpansionKind::None; + + return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind @@ -27312,6 +27324,27 @@ Results.push_back(EFLAGS.getValue(1)); return; } + case ISD::ATOMIC_LOAD: { + if (isTypeLegal(MVT::v2i64)) { + auto *Node = cast(N); + // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower + // 64-bits. + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + MVT::i64, Node->getMemOperand()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Res); + Results.push_back(Ld.getValue(1)); + return; + } + // TODO: Use MOVLPS when SSE1 is available? + // TODO: Use FILD/FISTP when X87 is available? + // Delegate to generic TypeLegalization. Situations we can really handle + // should have already been dealt with by AtomicExpandPass.cpp. + break; + } case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -27323,11 +27356,10 @@ case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_LOAD: { // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; - } + case ISD::BITCAST: { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT DstVT = N->getValueType(0); Index: llvm/test/CodeGen/X86/atomic-load-store-wide.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-load-store-wide.ll +++ llvm/test/CodeGen/X86/atomic-load-store-wide.ll @@ -36,22 +36,10 @@ define i64 @test2(i64* %ptr) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset %esi, -12 -; CHECK-NEXT: .cfi_offset %ebx, -8 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: lock cmpxchg8b (%esi) -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: pextrd $1, %xmm0, %edx ; CHECK-NEXT: retl %val = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %val