Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -6834,12 +6834,12 @@ ``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads produce :ref:`defined ` results when they may see multiple atomic stores. The type of the pointee must -be an integer type whose bit width is a power of two greater than or -equal to eight and less than or equal to a target-specific size limit. -``align`` must be explicitly specified on atomic loads, and the load has -undefined behavior if the alignment is not set to a value which is at -least the size in bytes of the pointee. ``!nontemporal`` does not have -any defined semantics for atomic loads. +be an integer or floating point type whose bit width is a power of two, +greater than or equal to eight, and less than or equal to a +target-specific size limit. ``align`` must be explicitly specified on +atomic loads, and the load has undefined behavior if the alignment is +not set to a value which is at least the size in bytes of the pointee. +``!nontemporal`` does not have any defined semantics for atomic loads. The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 @@ -6959,12 +6959,13 @@ ``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads produce :ref:`defined ` results when they may see multiple atomic stores. The type of the pointee must -be an integer type whose bit width is a power of two greater than or -equal to eight and less than or equal to a target-specific size limit. -``align`` must be explicitly specified on atomic stores, and the store -has undefined behavior if the alignment is not set to a value which is -at least the size in bytes of the pointee. ``!nontemporal`` does not -have any defined semantics for atomic stores. +be an integer or floating point type whose bit width is a power of two, +greater than or equal to eight, and less than or equal to a +target-specific size limit. ``align`` must be explicitly specified +on atomic stores, and the store has undefined behavior if the alignment +is not set to a value which is at least the size in bytes of the +pointee. ``!nontemporal`` does not have any defined semantics for +atomic stores. The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) load-linked/store-conditional loops or -// AtomicCmpXchg. +// target specific instruction which implement the same semantics in a way +// which better fits the target backend. This can include the use of either +// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or +// type coercions. // //===----------------------------------------------------------------------===// @@ -46,9 +48,12 @@ private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); bool tryExpandAtomicLoad(LoadInst *LI); bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicOpToLLSC( @@ -130,9 +135,27 @@ } if (LI) { + if (LI->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + LI = convertAtomicLoadToIntegerType(LI); + assert(LI->getType()->isIntegerTy() && "invariant broken"); + MadeChange = true; + } + MadeChange |= tryExpandAtomicLoad(LI); - } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { - MadeChange |= expandAtomicStore(SI); + } else if (SI) { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + SI = convertAtomicStoreToIntegerType(SI); + assert(SI->getValueOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } + + if (TLI->shouldExpandAtomicStoreInIR(SI)) + MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent @@ -172,6 +195,42 @@ return (LeadingFence || TrailingFence); } +/// Get the iX type with the same bitwidth as T. +IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, + const DataLayout &DL) { + EVT VT = TLI->getValueType(DL, T); + unsigned BitWidth = VT.getStoreSizeInBits(); + assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); + return IntegerType::get(T->getContext(), BitWidth); +} + +/// Convert an atomic load of a non-integeral type to an integer load of the +/// equivelent bitwidth. See the function comment on +/// convertAtomicStoreToIntegerType for background. +LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { + auto *M = LI->getModule(); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), + M->getDataLayout()); + + IRBuilder<> Builder(LI); + + Value *Addr = LI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + auto *NewLI = Builder.CreateLoad(NewAddr); + NewLI->setAlignment(LI->getAlignment()); + NewLI->setVolatile(LI->isVolatile()); + NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); + + Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); + LI->replaceAllUsesWith(NewVal); + LI->eraseFromParent(); + return NewLI; +} + bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: @@ -222,6 +281,35 @@ return true; } +/// Convert an atomic store of a non-integeral type to an integer store of the +/// equivelent bitwidth. We used to not support floating point or vector +/// atomics in the IR at all. The backends learned to deal with the bitcast +/// idiom because that was the only way of expressing the notion of a atomic +/// float or vector store. The long term plan is to teach each backend to +/// instruction select from the original atomic store, but as a migration +/// mechanism, we convert back to the old format which the backends understand. +/// Each backend will need individual work to recognize the new format. +StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { + IRBuilder<> Builder(SI); + auto *M = SI->getModule(); + Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), + M->getDataLayout()); + Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); + + Value *Addr = SI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); + NewSI->setAlignment(SI->getAlignment()); + NewSI->setVolatile(SI->isVolatile()); + NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); + SI->eraseFromParent(); + return NewSI; +} + bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -2720,7 +2720,8 @@ Assert(LI.getAlignment() != 0, "Atomic load must specify explicit alignment", &LI); if (!ElTy->isPointerTy()) { - Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!", + Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), + "atomic load operand must have integer or floating point type!", &LI, ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); Assert(Size >= 8 && !(Size & (Size - 1)), @@ -2749,8 +2750,9 @@ Assert(SI.getAlignment() != 0, "Atomic store must specify explicit alignment", &SI); if (!ElTy->isPointerTy()) { - Assert(ElTy->isIntegerTy(), - "atomic store operand must have integer type!", &SI, ElTy); + Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), + "atomic store operand must have integer or floating point type!", + &SI, ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); Assert(Size >= 8 && !(Size & (Size - 1)), "atomic store operand must be power-of-two byte-sized integer", Index: test/CodeGen/X86/atomic-non-integer.ll =================================================================== --- test/CodeGen/X86/atomic-non-integer.ll +++ test/CodeGen/X86/atomic-non-integer.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s + +; Note: This test is testing that the lowering for atomics matches what we +; currently emit for non-atomics + the atomic restriction. The presence of +; particular lowering detail in these tests should not be read as requiring +; that detail for correctness unless it's related to the atomicity itself. +; (Specifically, there were reviewer questions about the lowering for halfs +; and their calling convention which remain unresolved.) + +define void @store_half(half* %fptr, half %v) { +; CHECK-LABEL: @store_half +; CHECK: movq %rdi, %rbx +; CHECK: callq __gnu_f2h_ieee +; CHECK: movw %ax, (%rbx) + store atomic half %v, half* %fptr unordered, align 2 + ret void +} + +define void @store_float(float* %fptr, float %v) { +; CHECK-LABEL: @store_float +; CHECK: movd %xmm0, %eax +; CHECK: movl %eax, (%rdi) + store atomic float %v, float* %fptr unordered, align 4 + ret void +} + +define void @store_double(double* %fptr, double %v) { +; CHECK-LABEL: @store_double +; CHECK: movd %xmm0, %rax +; CHECK: movq %rax, (%rdi) + store atomic double %v, double* %fptr unordered, align 8 + ret void +} + +define void @store_fp128(fp128* %fptr, fp128 %v) { +; CHECK-LABEL: @store_fp128 +; CHECK: callq __sync_lock_test_and_set_16 + store atomic fp128 %v, fp128* %fptr unordered, align 16 + ret void +} + +define half @load_half(half* %fptr) { +; CHECK-LABEL: @load_half +; CHECK: movw (%rdi), %ax +; CHECK: movzwl %ax, %edi +; CHECK: jmp __gnu_h2f_ieee + %v = load atomic half, half* %fptr unordered, align 2 + ret half %v +} + +define float @load_float(float* %fptr) { +; CHECK-LABEL: @load_float +; CHECK: movl (%rdi), %eax +; CHECK: movd %eax, %xmm0 + %v = load atomic float, float* %fptr unordered, align 4 + ret float %v +} + +define double @load_double(double* %fptr) { +; CHECK-LABEL: @load_double +; CHECK: movq (%rdi), %rax +; CHECK: movd %rax, %xmm0 + %v = load atomic double, double* %fptr unordered, align 8 + ret double %v +} + +define fp128 @load_fp128(fp128* %fptr) { +; CHECK-LABEL: @load_fp128 +; CHECK: callq __sync_val_compare_and_swap_16 + %v = load atomic fp128, fp128* %fptr unordered, align 16 + ret fp128 %v +} + + +; sanity check the seq_cst lowering since that's the +; interesting one from an ordering perspective on x86. + +define void @store_float_seq_cst(float* %fptr, float %v) { +; CHECK-LABEL: @store_float_seq_cst +; CHECK: movd %xmm0, %eax +; CHECK: xchgl %eax, (%rdi) + store atomic float %v, float* %fptr seq_cst, align 4 + ret void +} + +define void @store_double_seq_cst(double* %fptr, double %v) { +; CHECK-LABEL: @store_double_seq_cst +; CHECK: movd %xmm0, %rax +; CHECK: xchgq %rax, (%rdi) + store atomic double %v, double* %fptr seq_cst, align 8 + ret void +} + +define float @load_float_seq_cst(float* %fptr) { +; CHECK-LABEL: @load_float_seq_cst +; CHECK: movl (%rdi), %eax +; CHECK: movd %eax, %xmm0 + %v = load atomic float, float* %fptr seq_cst, align 4 + ret float %v +} + +define double @load_double_seq_cst(double* %fptr) { +; CHECK-LABEL: @load_double_seq_cst +; CHECK: movq (%rdi), %rax +; CHECK: movd %rax, %xmm0 + %v = load atomic double, double* %fptr seq_cst, align 8 + ret double %v +} Index: test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll =================================================================== --- test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll +++ test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll @@ -0,0 +1,82 @@ +; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s + +; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and +; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this +; functionality, please move this test to a target which still is. + +define float @float_load_expand(float* %ptr) { +; CHECK-LABEL: @float_load_expand +; CHECK: %1 = bitcast float* %ptr to i32* +; CHECK: %2 = load atomic i32, i32* %1 unordered, align 4 +; CHECK: %3 = bitcast i32 %2 to float +; CHECK: ret float %3 + %res = load atomic float, float* %ptr unordered, align 4 + ret float %res +} + +define float @float_load_expand_seq_cst(float* %ptr) { +; CHECK-LABEL: @float_load_expand_seq_cst +; CHECK: %1 = bitcast float* %ptr to i32* +; CHECK: %2 = load atomic i32, i32* %1 seq_cst, align 4 +; CHECK: %3 = bitcast i32 %2 to float +; CHECK: ret float %3 + %res = load atomic float, float* %ptr seq_cst, align 4 + ret float %res +} + +define float @float_load_expand_vol(float* %ptr) { +; CHECK-LABEL: @float_load_expand_vol +; CHECK: %1 = bitcast float* %ptr to i32* +; CHECK: %2 = load atomic volatile i32, i32* %1 unordered, align 4 +; CHECK: %3 = bitcast i32 %2 to float +; CHECK: ret float %3 + %res = load atomic volatile float, float* %ptr unordered, align 4 + ret float %res +} + +define float @float_load_expand_addr1(float addrspace(1)* %ptr) { +; CHECK-LABEL: @float_load_expand_addr1 +; CHECK: %1 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)* +; CHECK: %2 = load atomic i32, i32 addrspace(1)* %1 unordered, align 4 +; CHECK: %3 = bitcast i32 %2 to float +; CHECK: ret float %3 + %res = load atomic float, float addrspace(1)* %ptr unordered, align 4 + ret float %res +} + +define void @float_store_expand(float* %ptr, float %v) { +; CHECK-LABEL: @float_store_expand +; CHECK: %1 = bitcast float %v to i32 +; CHECK: %2 = bitcast float* %ptr to i32* +; CHECK: store atomic i32 %1, i32* %2 unordered, align 4 + store atomic float %v, float* %ptr unordered, align 4 + ret void +} + +define void @float_store_expand_seq_cst(float* %ptr, float %v) { +; CHECK-LABEL: @float_store_expand_seq_cst +; CHECK: %1 = bitcast float %v to i32 +; CHECK: %2 = bitcast float* %ptr to i32* +; CHECK: store atomic i32 %1, i32* %2 seq_cst, align 4 + store atomic float %v, float* %ptr seq_cst, align 4 + ret void +} + +define void @float_store_expand_vol(float* %ptr, float %v) { +; CHECK-LABEL: @float_store_expand_vol +; CHECK: %1 = bitcast float %v to i32 +; CHECK: %2 = bitcast float* %ptr to i32* +; CHECK: store atomic volatile i32 %1, i32* %2 unordered, align 4 + store atomic volatile float %v, float* %ptr unordered, align 4 + ret void +} + +define void @float_store_expand_addr1(float addrspace(1)* %ptr, float %v) { +; CHECK-LABEL: @float_store_expand_addr1 +; CHECK: %1 = bitcast float %v to i32 +; CHECK: %2 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)* +; CHECK: store atomic i32 %1, i32 addrspace(1)* %2 unordered, align 4 + store atomic float %v, float addrspace(1)* %ptr unordered, align 4 + ret void +} + Index: test/Verifier/atomics.ll =================================================================== --- test/Verifier/atomics.ll +++ test/Verifier/atomics.ll @@ -0,0 +1,14 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +; CHECK: atomic store operand must have integer or floating point type! +; CHECK: atomic load operand must have integer or floating point type! + +define void @foo(x86_mmx* %P, x86_mmx %v) { + store atomic x86_mmx %v, x86_mmx* %P unordered, align 8 + ret void +} + +define x86_mmx @bar(x86_mmx* %P) { + %v = load atomic x86_mmx, x86_mmx* %P unordered, align 8 + ret x86_mmx %v +}