Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -1546,6 +1546,12 @@ return AtomicExpansionKind::None; } + /// Returns true if the given target supports lock-free atomic operations at + /// the specified width and alignment. + virtual bool hasBuiltinAtomic(uint64_t Size, uint64_t Align) const { + return Align >= Size && Size <= getMaxAtomicSizeInBitsSupported() / 8; + } + /// On some platforms, an AtomicRMW that never actually modifies the value /// (such as fetch_add of 0) can be turned into a fence followed by an /// atomic load. This may sound useless, but it makes it possible for the Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -163,7 +163,7 @@ bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { unsigned Size = getAtomicOpSize(I); unsigned Align = getAtomicOpAlign(I); - return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; + return TLI->hasBuiltinAtomic(Size, Align); } } // end anonymous namespace Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4069,8 +4069,8 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) - report_fatal_error("Cannot generate unaligned atomic load"); + if (!TLI.hasBuiltinAtomic(VT.getSizeInBits() / 8, I.getAlignment())) + report_fatal_error("Cannot generate atomic load for the instruction"); MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -4105,8 +4105,8 @@ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) - report_fatal_error("Cannot generate unaligned atomic store"); + if (!TLI.hasBuiltinAtomic(VT.getSizeInBits() / 8, I.getAlignment())) + report_fatal_error("Cannot generate atomic store for the instruction"); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1263,6 +1263,7 @@ TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool hasBuiltinAtomic(uint64_t Size, uint64_t Align) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -472,6 +472,7 @@ if (Subtarget.hasCmpxchg16b()) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } + setMaxAtomicSizeInBitsSupported(128); // FIXME - use subtarget debug flags if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && @@ -22837,6 +22838,10 @@ } } +bool X86TargetLowering::hasBuiltinAtomic(uint64_t Size, uint64_t Align) const { + return Size <= getMaxAtomicSizeInBitsSupported() / 8; +} + LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; Index: test/Transforms/AtomicExpand/X86/expand-unaligned-expansion.ll =================================================================== --- test/Transforms/AtomicExpand/X86/expand-unaligned-expansion.ll +++ test/Transforms/AtomicExpand/X86/expand-unaligned-expansion.ll @@ -0,0 +1,79 @@ +; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s +; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu -mattr=cx16 | FileCheck %s --check-prefix=CHECK-CX16 +; X86_64 supports unaligned atomic load/store for primitive integer types. +; Check the unaligned atomic load/store are properly expanded. + +define i16 @test_unaligned_load_i16(i16* %ptr) { +; CHECK-LABEL: @test_unaligned_load_i16 +; CHECK: %t0 = load atomic i16, i16* %ptr monotonic, align 1 +; CHECK: ret i16 %t0 + %t0 = load atomic i16, i16* %ptr monotonic, align 1 + ret i16 %t0 +} + +define i32 @test_unaligned_load_i32(i32* %ptr) { +; CHECK-LABEL: @test_unaligned_load_i32 +; CHECK: %t0 = load atomic i32, i32* %ptr monotonic, align 2 +; CHECK: ret i32 %t0 + %t0 = load atomic i32, i32* %ptr monotonic, align 2 + ret i32 %t0 +} + +define i64 @test_unaligned_load_i64(i64* %ptr) { +; CHECK-LABEL: @test_unaligned_load_i64 +; CHECK: %t0 = load atomic i64, i64* %ptr monotonic, align 4 +; CHECK: ret i64 %t0 + %t0 = load atomic i64, i64* %ptr monotonic, align 4 + ret i64 %t0 +} + +define i128 @test_unaligned_load_i128(i128* %ptr) { +; CHECK-LABEL: @test_unaligned_load_i128 +; CHECK: %t0 = load atomic i128, i128* %ptr monotonic, align 8 +; CHECK: ret i128 %t0 +; CHECK-CX16: %[[XCHG:.*]] = cmpxchg i128* %ptr, i128 0, i128 0 monotonic monotonic +; CHECK-CX16: %[[EXTR:.*]] = extractvalue { i128, i1 } %[[XCHG]], 0 +; CHECK-CX16: ret i128 %[[EXTR]] + %t0 = load atomic i128, i128* %ptr monotonic, align 8 + ret i128 %t0 +} + +define void @test_unaligned_store_i16(i16 %val, i16* %ptr) { +; CHECK-LABEL: @test_unaligned_store_i16 +; CHECK: store atomic i16 %val, i16* %ptr monotonic, align 1 +; CHECK: ret void + store atomic i16 %val, i16* %ptr monotonic, align 1 + ret void +} + +define void @test_unaligned_store_i32(i32 %val, i32* %ptr) { +; CHECK-LABEL: @test_unaligned_store_i32 +; CHECK: store atomic i32 %val, i32* %ptr monotonic, align 2 +; CHECK: ret void + store atomic i32 %val, i32* %ptr monotonic, align 2 + ret void +} + +define void @test_unaligned_store_i64(i64 %val, i64* %ptr) { +; CHECK-LABEL: @test_unaligned_store_i64 +; CHECK: store atomic i64 %val, i64* %ptr monotonic, align 4 +; CHECK: ret void + store atomic i64 %val, i64* %ptr monotonic, align 4 + ret void +} + +define void @test_unaligned_store_i128(i128 %val, i128* %ptr) { +; CHECK-LABEL: @test_unaligned_store_i128 +; CHECK: store atomic i128 %val, i128* %ptr monotonic, align 4 +; CHECK: ret void +; CHECK-CX16: %[[LOAD:.*]] = load i128, i128* %ptr, align 16 +; CHECK-CX16: br label %atomicrmw.start +; CHECK-CX16: atomicrmw.start: +; CHECK-CX16: %loaded = phi i128 [ %[[LOAD]], %0 ], [ %newloaded, %atomicrmw.start ] +; CHECK-CX16: %[[XCHG:.*]] = cmpxchg i128* %ptr, i128 %loaded, i128 %val monotonic monotonic +; CHECK-CX16: %[[SUCC:.*]] = extractvalue { i128, i1 } %[[XCHG]], 1 +; CHECK-CX16: %newloaded = extractvalue { i128, i1 } %[[XCHG]], 0 +; CHECK-CX16: br i1 %[[SUCC]], label %atomicrmw.end, label %atomicrmw.start + store atomic i128 %val, i128* %ptr monotonic, align 4 + ret void +}