diff --git a/llvm/docs/Atomics.rst b/llvm/docs/Atomics.rst --- a/llvm/docs/Atomics.rst +++ b/llvm/docs/Atomics.rst @@ -453,10 +453,10 @@ atomic constructs. Here are some lowerings it can do: * cmpxchg -> loop with load-linked/store-conditional - by overriding ``shouldExpandAtomicCmpXchgInIR()``, ``emitLoadLinked()``, - ``emitStoreConditional()`` -* large loads/stores -> ll-sc/cmpxchg - by overriding ``shouldExpandAtomicStoreInIR()``/``shouldExpandAtomicLoadInIR()`` + by overriding ``shouldExpandAtomicInstInIR()`` for ``AtomicCmpXchgInst``, + ``emitLoadLinked()``, and ``emitStoreConditional()`` +* large loads/stores -> ll-sc/cmpxchg by overriding + ``shouldExpandAtomicInstInIR()`` for ``LoadInst``/``StoreInst`` * strong atomic accesses -> monotonic accesses + fences by overriding ``shouldInsertFencesForAtomic()``, ``emitLeadingFence()``, and ``emitTrailingFence()`` @@ -464,8 +464,8 @@ by overriding ``expandAtomicRMWInIR()`` * expansion to __atomic_* libcalls for unsupported sizes. * part-word atomicrmw/cmpxchg -> target-specific intrinsic by overriding - ``shouldExpandAtomicRMWInIR``, ``emitMaskedAtomicRMWIntrinsic``, - ``shouldExpandAtomicCmpXchgInIR``, and ``emitMaskedAtomicCmpXchgIntrinsic``. + ``shouldExpandAtomicInstInIR`` for ``AtomicRMWInst``/``AtomicCmpXchgInst``, + ``emitMaskedAtomicRMWIntrinsic``, and ``emitMaskedAtomicCmpXchgIntrinsic``. For an example of these look at the ARM (first five lowerings) or RISC-V (last lowering) backend. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2041,9 +2041,14 @@ return true; } - /// Returns how the given (atomic) load should be expanded by the - /// IR-level AtomicExpand pass. - virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { + /// Returns how the given (atomic) instruction should be expanded by the + /// IR-level AtomicExpand pass. This is a combination of previous four API + /// functions. + virtual AtomicExpansionKind shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *RMW = dyn_cast(I)) + return RMW->isFloatingPointOperation() ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; + return AtomicExpansionKind::None; } @@ -2055,13 +2060,6 @@ return AtomicExpansionKind::None; } - /// Returns how the given (atomic) store should be expanded by the IR-level - /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try - /// to use an atomicrmw xchg. - virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { - return AtomicExpansionKind::None; - } - /// Returns how the given (atomic) store should be cast by the IR-level /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger /// will try to cast the operands to integer values. @@ -2071,20 +2069,6 @@ return AtomicExpansionKind::None; } - /// Returns how the given atomic cmpxchg should be expanded by the IR-level - /// AtomicExpand pass. - virtual AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { - return AtomicExpansionKind::None; - } - - /// Returns how the IR-level AtomicExpand pass should expand the given - /// AtomicRMW, if at all. Default is to never expand. - virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { - return RMW->isFloatingPointOperation() ? - AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; - } - /// Returns how the given atomic atomicrmw should be cast by the IR-level /// AtomicExpand pass. virtual AtomicExpansionKind @@ -2105,7 +2089,7 @@ /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf /// This method tries doing that transformation, returning the atomic load if /// it succeeds, and nullptr otherwise. - /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo + /// If shouldExpandAtomicInstInIR returns true on that load, it will undergo /// another round of expansion. virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -234,7 +234,7 @@ FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(AtomicOrdering::Monotonic); } else if (CASI && - TLI->shouldExpandAtomicCmpXchgInIR(CASI) == + TLI->shouldExpandAtomicInstInIR(CASI) == TargetLoweringBase::AtomicExpansionKind::None && (isReleaseOrStronger(CASI->getSuccessOrdering()) || isAcquireOrStronger(CASI->getSuccessOrdering()) || @@ -402,7 +402,7 @@ } bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { - switch (TLI->shouldExpandAtomicLoadInIR(LI)) { + switch (TLI->shouldExpandAtomicInstInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: @@ -424,7 +424,7 @@ } bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { - switch (TLI->shouldExpandAtomicStoreInIR(SI)) { + switch (TLI->shouldExpandAtomicInstInIR(SI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::Expand: @@ -509,7 +509,7 @@ // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. // It is the responsibility of the target to only signal expansion via - // shouldExpandAtomicRMW in cases where this is required and possible. + // shouldExpandAtomicInstInIR in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = Builder.CreateAtomicRMW( AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), @@ -548,7 +548,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { LLVMContext &Ctx = AI->getModule()->getContext(); - TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); + TargetLowering::AtomicExpansionKind Kind = + TLI->shouldExpandAtomicInstInIR(AI); switch (Kind) { case TargetLoweringBase::AtomicExpansionKind::None: return false; @@ -1493,7 +1494,7 @@ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(CI); - switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { + switch (TLI->shouldExpandAtomicInstInIR(CI)) { default: llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); case TargetLoweringBase::AtomicExpansionKind::None: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -676,14 +676,7 @@ bool shouldInsertFencesForAtomic(const Instruction *I) const override; TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; - - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + shouldExpandAtomicInstInIR(Instruction *I) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19697,100 +19697,102 @@ return isOpSuitableForLDPSTP(I); } -// Loads and stores less than 128-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. TargetLoweringBase::AtomicExpansionKind -AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - if (Size != 128 || isOpSuitableForLDPSTP(SI)) - return AtomicExpansionKind::None; - return AtomicExpansionKind::Expand; -} - -// Loads and stores less than 128-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. -TargetLowering::AtomicExpansionKind -AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - - if (Size != 128 || isOpSuitableForLDPSTP(LI)) - return AtomicExpansionKind::None; - - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement atomicrmw without spilling. If the target address is also on the - // stack and close enough to the spill slot, this can lead to a situation - // where the monitor always gets cleared and the atomic operation can never - // succeed. So at -O0 lower this operation to a CAS loop. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::CmpXChg; - - return AtomicExpansionKind::LLSC; -} - -// For the real atomic operations, we have ldxr/stxr up to 128 bits, -TargetLowering::AtomicExpansionKind -AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - if (AI->isFloatingPointOperation()) - return AtomicExpansionKind::CmpXChg; - - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - if (Size > 128) return AtomicExpansionKind::None; - - // Nand is not supported in LSE. - // Leave 128 bits to LLSC or CmpXChg. - if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { - if (Subtarget->hasLSE()) +AArch64TargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *SI = dyn_cast(I)) { + // Loads and stores less than 128-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + if (Size != 128 || isOpSuitableForLDPSTP(SI)) return AtomicExpansionKind::None; - if (Subtarget->outlineAtomics()) { - // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. - // Don't outline them unless - // (1) high level support approved: - // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf - // (2) low level libgcc and compiler-rt support implemented by: - // min/max outline atomics helpers - if (AI->getOperation() != AtomicRMWInst::Min && - AI->getOperation() != AtomicRMWInst::Max && - AI->getOperation() != AtomicRMWInst::UMin && - AI->getOperation() != AtomicRMWInst::UMax) { - return AtomicExpansionKind::None; - } - } + return AtomicExpansionKind::Expand; } - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement atomicrmw without spilling. If the target address is also on the - // stack and close enough to the spill slot, this can lead to a situation - // where the monitor always gets cleared and the atomic operation can never - // succeed. So at -O0 lower this operation to a CAS loop. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::CmpXChg; + if (auto *LI = dyn_cast(I)) { + // Loads and stores less than 128-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return AtomicExpansionKind::LLSC; -} + if (Size != 128 || isOpSuitableForLDPSTP(LI)) + return AtomicExpansionKind::None; -TargetLowering::AtomicExpansionKind -AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( - AtomicCmpXchgInst *AI) const { - // If subtarget has LSE, leave cmpxchg intact for codegen. - if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) - return AtomicExpansionKind::None; - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement cmpxchg without spilling. If the address being exchanged is also - // on the stack and close enough to the spill slot, this can lead to a - // situation where the monitor always gets cleared and the atomic operation - // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::None; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on + // the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; - // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand - // it. - unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); - if (Size > 64) - return AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; + } - return AtomicExpansionKind::LLSC; + if (auto *AI = dyn_cast(I)) { + // For the real atomic operations, we have ldxr/stxr up to 128 bits, + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (Size > 128) + return AtomicExpansionKind::None; + + // Nand is not supported in LSE. + // Leave 128 bits to LLSC or CmpXChg. + if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { + if (Subtarget->hasLSE()) + return AtomicExpansionKind::None; + if (Subtarget->outlineAtomics()) { + // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. + // Don't outline them unless + // (1) high level support approved: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf + // (2) low level libgcc and compiler-rt support implemented by: + // min/max outline atomics helpers + if (AI->getOperation() != AtomicRMWInst::Min && + AI->getOperation() != AtomicRMWInst::Max && + AI->getOperation() != AtomicRMWInst::UMin && + AI->getOperation() != AtomicRMWInst::UMax) { + return AtomicExpansionKind::None; + } + } + } + + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on + // the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + + return AtomicExpansionKind::LLSC; + } + + if (auto *AI = dyn_cast(I)) { + // If subtarget has LSE, leave cmpxchg intact for codegen. + if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) + return AtomicExpansionKind::None; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement cmpxchg without spilling. If the address being exchanged is + // also on the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::None; + + // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand + // it. + unsigned Size = + AI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); + if (Size > 64) + return AtomicExpansionKind::None; + + return AtomicExpansionKind::LLSC; + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -334,7 +334,8 @@ return MVT::i32; } - AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, LLT Ty2) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4793,16 +4793,20 @@ } } -TargetLowering::AtomicExpansionKind -AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { - switch (RMW->getOperation()) { - case AtomicRMWInst::Nand: - case AtomicRMWInst::FAdd: - case AtomicRMWInst::FSub: - return AtomicExpansionKind::CmpXChg; - default: - return AtomicExpansionKind::None; +TargetLoweringBase::AtomicExpansionKind +AMDGPUTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *RMW = dyn_cast(I)) { + switch (RMW->getOperation()) { + case AtomicRMWInst::Nand: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: + return AtomicExpansionKind::CmpXChg; + default: + return AtomicExpansionKind::None; + } } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal( diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -473,11 +473,9 @@ const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const override; - AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12545,120 +12545,120 @@ return DenormMode == DenormalMode::getIEEE(); } -TargetLowering::AtomicExpansionKind -SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { - unsigned AS = RMW->getPointerAddressSpace(); - if (AS == AMDGPUAS::PRIVATE_ADDRESS) - return AtomicExpansionKind::NotAtomic; +TargetLoweringBase::AtomicExpansionKind +SITargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *SI = dyn_cast(I)) + return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS + ? AtomicExpansionKind::NotAtomic + : AtomicExpansionKind::None; - auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) { - OptimizationRemarkEmitter ORE(RMW->getFunction()); - LLVMContext &Ctx = RMW->getFunction()->getContext(); - SmallVector SSNs; - Ctx.getSyncScopeNames(SSNs); - auto MemScope = SSNs[RMW->getSyncScopeID()].empty() - ? "system" - : SSNs[RMW->getSyncScopeID()]; - ORE.emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Passed", RMW) - << "Hardware instruction generated for atomic " - << RMW->getOperationName(RMW->getOperation()) - << " operation at memory scope " << MemScope - << " due to an unsafe request."; - }); - return Kind; - }; + if (auto *LI = dyn_cast(I)) + return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS + ? AtomicExpansionKind::NotAtomic + : AtomicExpansionKind::None; - switch (RMW->getOperation()) { - case AtomicRMWInst::FAdd: { - Type *Ty = RMW->getType(); + if (auto *RMW = dyn_cast(I)) { + unsigned AS = RMW->getPointerAddressSpace(); + if (AS == AMDGPUAS::PRIVATE_ADDRESS) + return AtomicExpansionKind::NotAtomic; - // We don't have a way to support 16-bit atomics now, so just leave them - // as-is. - if (Ty->isHalfTy()) - return AtomicExpansionKind::None; + auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) { + OptimizationRemarkEmitter ORE(RMW->getFunction()); + LLVMContext &Ctx = RMW->getFunction()->getContext(); + SmallVector SSNs; + Ctx.getSyncScopeNames(SSNs); + auto MemScope = SSNs[RMW->getSyncScopeID()].empty() + ? "system" + : SSNs[RMW->getSyncScopeID()]; + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Passed", RMW) + << "Hardware instruction generated for atomic " + << RMW->getOperationName(RMW->getOperation()) + << " operation at memory scope " << MemScope + << " due to an unsafe request."; + }); + return Kind; + }; - if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy())) - return AtomicExpansionKind::CmpXChg; + switch (RMW->getOperation()) { + case AtomicRMWInst::FAdd: { + Type *Ty = RMW->getType(); - if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) && - Subtarget->hasAtomicFaddInsts()) { - if (Subtarget->hasGFX940Insts()) + // We don't have a way to support 16-bit atomics now, so just leave them + // as-is. + if (Ty->isHalfTy()) return AtomicExpansionKind::None; - // The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe - // floating point atomic instructions. May generate more efficient code, - // but may not respect rounding and denormal modes, and may give incorrect - // results for certain memory destinations. - if (RMW->getFunction() - ->getFnAttribute("amdgpu-unsafe-fp-atomics") - .getValueAsString() != "true") + if (!Ty->isFloatTy() && + (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy())) return AtomicExpansionKind::CmpXChg; - if (Subtarget->hasGFX90AInsts()) { - if (Ty->isFloatTy() && AS == AMDGPUAS::FLAT_ADDRESS) + if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) && + Subtarget->hasAtomicFaddInsts()) { + if (Subtarget->hasGFX940Insts()) + return AtomicExpansionKind::None; + + // The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe + // floating point atomic instructions. May generate more efficient code, + // but may not respect rounding and denormal modes, and may give + // incorrect results for certain memory destinations. + if (RMW->getFunction() + ->getFnAttribute("amdgpu-unsafe-fp-atomics") + .getValueAsString() != "true") return AtomicExpansionKind::CmpXChg; - auto SSID = RMW->getSyncScopeID(); - if (SSID == SyncScope::System || - SSID == RMW->getContext().getOrInsertSyncScopeID("one-as")) + if (Subtarget->hasGFX90AInsts()) { + if (Ty->isFloatTy() && AS == AMDGPUAS::FLAT_ADDRESS) + return AtomicExpansionKind::CmpXChg; + + auto SSID = RMW->getSyncScopeID(); + if (SSID == SyncScope::System || + SSID == RMW->getContext().getOrInsertSyncScopeID("one-as")) + return AtomicExpansionKind::CmpXChg; + + return ReportUnsafeHWInst(AtomicExpansionKind::None); + } + + if (AS == AMDGPUAS::FLAT_ADDRESS) return AtomicExpansionKind::CmpXChg; - return ReportUnsafeHWInst(AtomicExpansionKind::None); + return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None) + : AtomicExpansionKind::CmpXChg; } - if (AS == AMDGPUAS::FLAT_ADDRESS) - return AtomicExpansionKind::CmpXChg; + // DS FP atomics do respect the denormal mode, but the rounding mode is + // fixed to round-to-nearest-even. + // The only exception is DS_ADD_F64 which never flushes regardless of + // mode. + if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) { + if (!Ty->isDoubleTy()) + return AtomicExpansionKind::None; - return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None) - : AtomicExpansionKind::CmpXChg; + if (fpModeMatchesGlobalFPAtomicMode(RMW)) + return AtomicExpansionKind::None; + + return RMW->getFunction() + ->getFnAttribute("amdgpu-unsafe-fp-atomics") + .getValueAsString() == "true" + ? ReportUnsafeHWInst(AtomicExpansionKind::None) + : AtomicExpansionKind::CmpXChg; + } + + return AtomicExpansionKind::CmpXChg; + } + default: + break; } - // DS FP atomics do respect the denormal mode, but the rounding mode is - // fixed to round-to-nearest-even. - // The only exception is DS_ADD_F64 which never flushes regardless of mode. - if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) { - if (!Ty->isDoubleTy()) - return AtomicExpansionKind::None; - - if (fpModeMatchesGlobalFPAtomicMode(RMW)) - return AtomicExpansionKind::None; - - return RMW->getFunction() - ->getFnAttribute("amdgpu-unsafe-fp-atomics") - .getValueAsString() == "true" - ? ReportUnsafeHWInst(AtomicExpansionKind::None) - : AtomicExpansionKind::CmpXChg; - } - - return AtomicExpansionKind::CmpXChg; - } - default: - break; + return AMDGPUTargetLowering::shouldExpandAtomicInstInIR(RMW); } - return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); -} + if (auto *CmpX = dyn_cast(I)) + return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS + ? AtomicExpansionKind::NotAtomic + : AtomicExpansionKind::None; -TargetLowering::AtomicExpansionKind -SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS - ? AtomicExpansionKind::NotAtomic - : AtomicExpansionKind::None; -} - -TargetLowering::AtomicExpansionKind -SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS - ? AtomicExpansionKind::NotAtomic - : AtomicExpansionKind::None; -} - -TargetLowering::AtomicExpansionKind -SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const { - return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS - ? AtomicExpansionKind::NotAtomic - : AtomicExpansionKind::None; + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } const TargetRegisterClass * diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -663,14 +663,9 @@ unsigned Factor) const override; bool shouldInsertFencesForAtomic(const Instruction *I) const override; + TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + shouldExpandAtomicInstInIR(Instruction *I) const override; bool useLoadStackGuardNode() const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -20943,96 +20943,97 @@ llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } -// Loads and stores less than 64-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit -// anything for those. TargetLoweringBase::AtomicExpansionKind -ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - bool has64BitAtomicStore; - if (Subtarget->isMClass()) - has64BitAtomicStore = false; - else if (Subtarget->isThumb()) - has64BitAtomicStore = Subtarget->hasV7Ops(); - else - has64BitAtomicStore = Subtarget->hasV6Ops(); +ARMTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *SI = dyn_cast(I)) { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't + // emit anything for those. + bool has64BitAtomicStore; + if (Subtarget->isMClass()) + has64BitAtomicStore = false; + else if (Subtarget->isThumb()) + has64BitAtomicStore = Subtarget->hasV7Ops(); + else + has64BitAtomicStore = Subtarget->hasV6Ops(); - unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand - : AtomicExpansionKind::None; -} - -// Loads and stores less than 64-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit -// anything for those. -// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that -// guarantee, see DDI0406C ARM architecture reference manual, -// sections A8.8.72-74 LDRD) -TargetLowering::AtomicExpansionKind -ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - bool has64BitAtomicLoad; - if (Subtarget->isMClass()) - has64BitAtomicLoad = false; - else if (Subtarget->isThumb()) - has64BitAtomicLoad = Subtarget->hasV7Ops(); - else - has64BitAtomicLoad = Subtarget->hasV6Ops(); - - unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; -} - -// For the real atomic operations, we have ldrex/strex up to 32 bits, -// and up to 64 bits on the non-M profiles -TargetLowering::AtomicExpansionKind -ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - if (AI->isFloatingPointOperation()) - return AtomicExpansionKind::CmpXChg; - - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - bool hasAtomicRMW; - if (Subtarget->isMClass()) - hasAtomicRMW = Subtarget->hasV8MBaselineOps(); - else if (Subtarget->isThumb()) - hasAtomicRMW = Subtarget->hasV7Ops(); - else - hasAtomicRMW = Subtarget->hasV6Ops(); - if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) { - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement atomicrmw without spilling. If the target address is also on - // the stack and close enough to the spill slot, this can lead to a - // situation where the monitor always gets cleared and the atomic operation - // can never succeed. So at -O0 lower this operation to a CAS loop. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::CmpXChg; - return AtomicExpansionKind::LLSC; + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; } - return AtomicExpansionKind::None; -} -// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 -// bits, and up to 64 bits on the non-M profiles. -TargetLowering::AtomicExpansionKind -ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement cmpxchg without spilling. If the address being exchanged is also - // on the stack and close enough to the spill slot, this can lead to a - // situation where the monitor always gets cleared and the atomic operation - // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. - unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); - bool HasAtomicCmpXchg; - if (Subtarget->isMClass()) - HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps(); - else if (Subtarget->isThumb()) - HasAtomicCmpXchg = Subtarget->hasV7Ops(); - else - HasAtomicCmpXchg = Subtarget->hasV6Ops(); - if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && - Size <= (Subtarget->isMClass() ? 32U : 64U)) - return AtomicExpansionKind::LLSC; - return AtomicExpansionKind::None; + if (auto *LI = dyn_cast(I)) { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't + // emit anything for those. + // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that + // guarantee, see DDI0406C ARM architecture reference manual, + // sections A8.8.72-74 LDRD) + bool has64BitAtomicLoad; + if (Subtarget->isMClass()) + has64BitAtomicLoad = false; + else if (Subtarget->isThumb()) + has64BitAtomicLoad = Subtarget->hasV7Ops(); + else + has64BitAtomicLoad = Subtarget->hasV6Ops(); + + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; + } + + if (auto *AI = dyn_cast(I)) { + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + bool hasAtomicRMW; + if (Subtarget->isMClass()) + hasAtomicRMW = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + hasAtomicRMW = Subtarget->hasV7Ops(); + else + hasAtomicRMW = Subtarget->hasV6Ops(); + if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) { + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on + // the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic + // operation can never succeed. So at -O0 lower this operation to a CAS + // loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; + } + return AtomicExpansionKind::None; + } + + if (auto *AI = dyn_cast(I)) { + // ldrex/strex can be used up to 32 bits, and up to 64 bits on the non-M + // profiles. At -O0, fast-regalloc cannot cope with the live vregs necessary + // to implement cmpxchg without spilling. If the address being exchanged is + // also on the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); + bool HasAtomicCmpXchg; + if (Subtarget->isMClass()) + HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + HasAtomicCmpXchg = Subtarget->hasV7Ops(); + else + HasAtomicCmpXchg = Subtarget->hasV6Ops(); + if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && + Size <= (Subtarget->isMClass() ? 32U : 64U)) + return AtomicExpansionKind::LLSC; + return AtomicExpansionKind::None; + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } bool ARMTargetLowering::shouldInsertFencesForAtomic( diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -327,15 +327,8 @@ AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; - - AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { - return AtomicExpansionKind::LLSC; - } + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; private: void initializeHVXLowering(); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3658,24 +3658,27 @@ return Ext; } -TargetLowering::AtomicExpansionKind -HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - // Do not expand loads and stores that don't exceed 64 bits. - return LI->getType()->getPrimitiveSizeInBits() > 64 - ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; -} +TargetLoweringBase::AtomicExpansionKind +HexagonTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *SI = dyn_cast(I)) { + // Do not expand loads and stores that don't exceed 64 bits. + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; + } -TargetLowering::AtomicExpansionKind -HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - // Do not expand loads and stores that don't exceed 64 bits. - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64 - ? AtomicExpansionKind::Expand - : AtomicExpansionKind::None; -} + if (auto *LI = dyn_cast(I)) { + // Do not expand loads and stores that don't exceed 64 bits. + return LI->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; + } -TargetLowering::AtomicExpansionKind -HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( - AtomicCmpXchgInst *AI) const { - return AtomicExpansionKind::LLSC; + if (auto *AI = dyn_cast(I)) + return AtomicExpansionKind::LLSC; + + if (auto *AI = dyn_cast(I)) + return AtomicExpansionKind::LLSC; + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -569,8 +569,8 @@ return AtomicExpansionKind::None; } - AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; private: const NVPTXSubtarget &STI; // cache the subtarget here diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -5127,65 +5127,69 @@ } } -NVPTXTargetLowering::AtomicExpansionKind -NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - Type *Ty = AI->getValOperand()->getType(); +TargetLoweringBase::AtomicExpansionKind +NVPTXTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *AI = dyn_cast(I)) { + Type *Ty = AI->getValOperand()->getType(); - if (AI->isFloatingPointOperation()) { - if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { - if (Ty->isFloatTy()) - return AtomicExpansionKind::None; - if (Ty->isDoubleTy() && STI.hasAtomAddF64()) - return AtomicExpansionKind::None; + if (AI->isFloatingPointOperation()) { + if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { + if (Ty->isFloatTy()) + return AtomicExpansionKind::None; + if (Ty->isDoubleTy() && STI.hasAtomAddF64()) + return AtomicExpansionKind::None; + } + return AtomicExpansionKind::CmpXChg; } + + assert(Ty->isIntegerTy() && "Ty should be integer at this point"); + auto ITy = cast(Ty); + + switch (AI->getOperation()) { + default: + return AtomicExpansionKind::CmpXChg; + case AtomicRMWInst::BinOp::And: + case AtomicRMWInst::BinOp::Or: + case AtomicRMWInst::BinOp::Xor: + case AtomicRMWInst::BinOp::Xchg: + switch (ITy->getBitWidth()) { + case 8: + case 16: + return AtomicExpansionKind::CmpXChg; + case 32: + return AtomicExpansionKind::None; + case 64: + if (STI.hasAtomBitwise64()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::CmpXChg; + default: + llvm_unreachable("unsupported width encountered"); + } + case AtomicRMWInst::BinOp::Add: + case AtomicRMWInst::BinOp::Sub: + case AtomicRMWInst::BinOp::Max: + case AtomicRMWInst::BinOp::Min: + case AtomicRMWInst::BinOp::UMax: + case AtomicRMWInst::BinOp::UMin: + switch (ITy->getBitWidth()) { + case 8: + case 16: + return AtomicExpansionKind::CmpXChg; + case 32: + return AtomicExpansionKind::None; + case 64: + if (STI.hasAtomMinMax64()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::CmpXChg; + default: + llvm_unreachable("unsupported width encountered"); + } + } + return AtomicExpansionKind::CmpXChg; } - assert(Ty->isIntegerTy() && "Ty should be integer at this point"); - auto ITy = cast(Ty); - - switch (AI->getOperation()) { - default: - return AtomicExpansionKind::CmpXChg; - case AtomicRMWInst::BinOp::And: - case AtomicRMWInst::BinOp::Or: - case AtomicRMWInst::BinOp::Xor: - case AtomicRMWInst::BinOp::Xchg: - switch (ITy->getBitWidth()) { - case 8: - case 16: - return AtomicExpansionKind::CmpXChg; - case 32: - return AtomicExpansionKind::None; - case 64: - if (STI.hasAtomBitwise64()) - return AtomicExpansionKind::None; - return AtomicExpansionKind::CmpXChg; - default: - llvm_unreachable("unsupported width encountered"); - } - case AtomicRMWInst::BinOp::Add: - case AtomicRMWInst::BinOp::Sub: - case AtomicRMWInst::BinOp::Max: - case AtomicRMWInst::BinOp::Min: - case AtomicRMWInst::BinOp::UMax: - case AtomicRMWInst::BinOp::UMin: - switch (ITy->getBitWidth()) { - case 8: - case 16: - return AtomicExpansionKind::CmpXChg; - case 32: - return AtomicExpansionKind::None; - case 64: - if (STI.hasAtomMinMax64()) - return AtomicExpansionKind::None; - return AtomicExpansionKind::CmpXChg; - default: - llvm_unreachable("unsupported width encountered"); - } - } - - return AtomicExpansionKind::CmpXChg; + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } // Pin NVPTXTargetObjectFile's vtables to this file. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -912,11 +912,8 @@ bool shouldInlineQuadwordAtomics() const; - TargetLowering::AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; - - TargetLowering::AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6238,7 +6238,7 @@ ArgOffset += PtrByteSize; continue; } - // Copy the object to parameter save area if it can not be entirely passed + // Copy the object to parameter save area if it can not be entirely passed // by registers. // FIXME: we only need to copy the parts which need to be passed in // parameter save area. For the parts passed by registers, we don't need @@ -6871,7 +6871,7 @@ // // Low Memory +--------------------------------------------+ // SP +---> | Back chain | ---+ -// | +--------------------------------------------+ | +// | +--------------------------------------------+ | // | | Saved Condition Register | | // | +--------------------------------------------+ | // | | Saved Linkage Register | | @@ -7836,7 +7836,7 @@ return SDValue(); SDValue N1 = Op.getOperand(0); - EVT SrcVT = N1.getValueType(); + EVT SrcVT = N1.getValueType(); unsigned SrcSize = SrcVT.getSizeInBits(); if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) || @@ -18080,20 +18080,23 @@ Subtarget.hasQuadwordAtomics(); } -TargetLowering::AtomicExpansionKind -PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - if (shouldInlineQuadwordAtomics() && Size == 128) - return AtomicExpansionKind::MaskedIntrinsic; - return TargetLowering::shouldExpandAtomicRMWInIR(AI); -} +TargetLoweringBase::AtomicExpansionKind +PPCTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *AI = dyn_cast(I)) { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (shouldInlineQuadwordAtomics() && Size == 128) + return AtomicExpansionKind::MaskedIntrinsic; + return TargetLowering::shouldExpandAtomicInstInIR(AI); + } -TargetLowering::AtomicExpansionKind -PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { - unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits(); - if (shouldInlineQuadwordAtomics() && Size == 128) - return AtomicExpansionKind::MaskedIntrinsic; - return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI); + if (auto *AI = dyn_cast(I)) { + unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits(); + if (shouldInlineQuadwordAtomics() && Size == 128) + return AtomicExpansionKind::MaskedIntrinsic; + return TargetLowering::shouldExpandAtomicInstInIR(AI); + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } static Intrinsic::ID diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -509,14 +509,12 @@ bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override; - TargetLowering::AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override; - TargetLowering::AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11517,18 +11517,30 @@ return nullptr; } -TargetLowering::AtomicExpansionKind -RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating - // point operations can't be used in an lr/sc sequence without breaking the - // forward-progress guarantee. - if (AI->isFloatingPointOperation()) - return AtomicExpansionKind::CmpXChg; +TargetLoweringBase::AtomicExpansionKind +RISCVTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *AI = dyn_cast(I)) { + // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as + // floating point operations can't be used in an lr/sc sequence without + // breaking the forward-progress guarantee. + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - if (Size == 8 || Size == 16) - return AtomicExpansionKind::MaskedIntrinsic; - return AtomicExpansionKind::None; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; + } + + if (auto *CI = dyn_cast(I)) { + unsigned Size = + CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } static Intrinsic::ID @@ -11625,15 +11637,6 @@ return Result; } -TargetLowering::AtomicExpansionKind -RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( - AtomicCmpXchgInst *CI) const { - unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); - if (Size == 8 || Size == 16) - return AtomicExpansionKind::MaskedIntrinsic; - return AtomicExpansionKind::None; -} - Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -201,7 +201,8 @@ return true; } - AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl& Results, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1397,12 +1397,17 @@ // TargetLowering Implementation //===----------------------------------------------------------------------===// -TargetLowering::AtomicExpansionKind SparcTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - if (AI->getOperation() == AtomicRMWInst::Xchg && - AI->getType()->getPrimitiveSizeInBits() == 32) - return AtomicExpansionKind::None; // Uses xchg instruction +TargetLoweringBase::AtomicExpansionKind +SparcTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *AI = dyn_cast(I)) { + if (AI->getOperation() == AtomicRMWInst::Xchg && + AI->getType()->getPrimitiveSizeInBits() == 32) + return AtomicExpansionKind::None; // Uses xchg instruction - return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::CmpXChg; + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } /// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -112,7 +112,7 @@ Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + shouldExpandAtomicInstInIR(Instruction *I) const override; ISD::NodeType getExtendForAtomicOps() const override { return ISD::ANY_EXTEND; } diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1120,17 +1120,21 @@ return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } -TargetLowering::AtomicExpansionKind -VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - // We have TS1AM implementation for i8/i16/i32/i64, so use it. - if (AI->getOperation() == AtomicRMWInst::Xchg) { - return AtomicExpansionKind::None; - } - // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR. +TargetLoweringBase::AtomicExpansionKind +VETargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *AI = dyn_cast(I)) { + // We have TS1AM implementation for i8/i16/i32/i64, so use it. + if (AI->getOperation() == AtomicRMWInst::Xchg) { + return AtomicExpansionKind::None; + } + // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR. - // Otherwise, expand it using compare and exchange instruction to not call - // __sync_fetch_and_* functions. - return AtomicExpansionKind::CmpXChg; + // Otherwise, expand it using compare and exchange instruction to not call + // __sync_fetch_and_* functions. + return AtomicExpansionKind::CmpXChg; + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -53,7 +53,8 @@ /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; - AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicInstInIR(Instruction *I) const override; bool shouldScalarizeBinop(SDValue VecOp) const override; FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -362,21 +362,25 @@ return TargetLowering::getPointerMemTy(DL, AS); } -TargetLowering::AtomicExpansionKind -WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - // We have wasm instructions for these - switch (AI->getOperation()) { - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - case AtomicRMWInst::And: - case AtomicRMWInst::Or: - case AtomicRMWInst::Xor: - case AtomicRMWInst::Xchg: - return AtomicExpansionKind::None; - default: - break; +TargetLoweringBase::AtomicExpansionKind +WebAssemblyTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *AI = dyn_cast(I)) { + // We have wasm instructions for these + switch (AI->getOperation()) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::And: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + case AtomicRMWInst::Xchg: + return AtomicExpansionKind::None; + default: + break; + } + return AtomicExpansionKind::CmpXChg; } - return AtomicExpansionKind::CmpXChg; + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1623,11 +1623,8 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + shouldExpandAtomicInstInIR(Instruction *I) const override; + TargetLoweringBase::AtomicExpansionKind shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30472,38 +30472,78 @@ } TargetLoweringBase::AtomicExpansionKind -X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - Type *MemType = SI->getValueOperand()->getType(); +X86TargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const { + if (auto *SI = dyn_cast(I)) { + Type *MemType = SI->getValueOperand()->getType(); - bool NoImplicitFloatOps = - SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); - if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && - !Subtarget.useSoftFloat() && !NoImplicitFloatOps && - (Subtarget.hasSSE1() || Subtarget.hasX87())) - return AtomicExpansionKind::None; + bool NoImplicitFloatOps = + SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); + if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && + (Subtarget.hasSSE1() || Subtarget.hasX87())) + return AtomicExpansionKind::None; - return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand - : AtomicExpansionKind::None; -} + return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; + } -// Note: this turns large loads into lock cmpxchg8b/16b. -// TODO: In 32-bit mode, use MOVLPS when SSE1 is available? -TargetLowering::AtomicExpansionKind -X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - Type *MemType = LI->getType(); + if (auto *LI = dyn_cast(I)) { + // Note: this turns large loads into lock cmpxchg8b/16b. + // TODO: In 32-bit mode, use MOVLPS when SSE1 is available? + Type *MemType = LI->getType(); - // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we - // can use movq to do the load. If we have X87 we can load into an 80-bit - // X87 register and store it to a stack temporary. - bool NoImplicitFloatOps = - LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); - if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && - !Subtarget.useSoftFloat() && !NoImplicitFloatOps && - (Subtarget.hasSSE1() || Subtarget.hasX87())) - return AtomicExpansionKind::None; + // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we + // can use movq to do the load. If we have X87 we can load into an 80-bit + // X87 register and store it to a stack temporary. + bool NoImplicitFloatOps = + LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); + if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && + (Subtarget.hasSSE1() || Subtarget.hasX87())) + return AtomicExpansionKind::None; - return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; + } + + if (auto *AI = dyn_cast(I)) { + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + Type *MemType = AI->getType(); + + // If the operand is too big, we must see if cmpxchg8/16b is available + // and default to library calls otherwise. + if (MemType->getPrimitiveSizeInBits() > NativeWidth) { + return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; + } + + AtomicRMWInst::BinOp Op = AI->getOperation(); + switch (Op) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Xchg: + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + // It's better to use xadd, xsub or xchg for these in all cases. + return AtomicExpansionKind::None; + case AtomicRMWInst::Or: + case AtomicRMWInst::And: + case AtomicRMWInst::Xor: + return shouldExpandLogicAtomicRMWInIR(AI); + case AtomicRMWInst::Nand: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: + // These always require a non-trivial set of data operations on x86. We + // must use a cmpxchg loop. + return AtomicExpansionKind::CmpXChg; + } + } + + return TargetLoweringBase::shouldExpandAtomicInstInIR(I); } TargetLowering::AtomicExpansionKind @@ -30565,44 +30605,6 @@ AI->eraseFromParent(); } -TargetLowering::AtomicExpansionKind -X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; - Type *MemType = AI->getType(); - - // If the operand is too big, we must see if cmpxchg8/16b is available - // and default to library calls otherwise. - if (MemType->getPrimitiveSizeInBits() > NativeWidth) { - return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; - } - - AtomicRMWInst::BinOp Op = AI->getOperation(); - switch (Op) { - default: - llvm_unreachable("Unknown atomic operation"); - case AtomicRMWInst::Xchg: - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - // It's better to use xadd, xsub or xchg for these in all cases. - return AtomicExpansionKind::None; - case AtomicRMWInst::Or: - case AtomicRMWInst::And: - case AtomicRMWInst::Xor: - return shouldExpandLogicAtomicRMWInIR(AI); - case AtomicRMWInst::Nand: - case AtomicRMWInst::Max: - case AtomicRMWInst::Min: - case AtomicRMWInst::UMax: - case AtomicRMWInst::UMin: - case AtomicRMWInst::FAdd: - case AtomicRMWInst::FSub: - // These always require a non-trivial set of data operations on x86. We must - // use a cmpxchg loop. - return AtomicExpansionKind::CmpXChg; - } -} - LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -44387,7 +44389,7 @@ // Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might // get split by legalization. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST && - CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() && + CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() && TLI.isTypeLegal(VT.getScalarType())) { EVT ExtCondVT = VT.changeVectorElementTypeToInteger(); if (SDValue ExtCond = combineToExtendBoolVectorInReg(