Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | |||||
#include "llvm/IR/IntrinsicsAArch64.h" | #include "llvm/IR/IntrinsicsAArch64.h" | ||||
#include "llvm/IR/Module.h" | #include "llvm/IR/Module.h" | ||||
#include "llvm/IR/OperandTraits.h" | #include "llvm/IR/OperandTraits.h" | ||||
#include "llvm/IR/PatternMatch.h" | #include "llvm/IR/PatternMatch.h" | ||||
#include "llvm/IR/Type.h" | #include "llvm/IR/Type.h" | ||||
#include "llvm/IR/Use.h" | #include "llvm/IR/Use.h" | ||||
#include "llvm/IR/Value.h" | #include "llvm/IR/Value.h" | ||||
#include "llvm/MC/MCRegisterInfo.h" | #include "llvm/MC/MCRegisterInfo.h" | ||||
#include "llvm/Support/AtomicOrdering.h" | |||||
#include "llvm/Support/Casting.h" | #include "llvm/Support/Casting.h" | ||||
#include "llvm/Support/CodeGen.h" | #include "llvm/Support/CodeGen.h" | ||||
#include "llvm/Support/CommandLine.h" | #include "llvm/Support/CommandLine.h" | ||||
#include "llvm/Support/Compiler.h" | #include "llvm/Support/Compiler.h" | ||||
#include "llvm/Support/Debug.h" | #include "llvm/Support/Debug.h" | ||||
#include "llvm/Support/ErrorHandling.h" | #include "llvm/Support/ErrorHandling.h" | ||||
#include "llvm/Support/InstructionCost.h" | #include "llvm/Support/InstructionCost.h" | ||||
#include "llvm/Support/KnownBits.h" | #include "llvm/Support/KnownBits.h" | ||||
▲ Show 20 Lines • Show All 22,468 Lines • ▼ Show 20 Lines | bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const { | ||||
if (auto SI = dyn_cast<StoreInst>(I)) | if (auto SI = dyn_cast<StoreInst>(I)) | ||||
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && | return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && | ||||
SI->getAlign() >= Align(16); | SI->getAlign() >= Align(16); | ||||
return false; | return false; | ||||
} | } | ||||
bool AArch64TargetLowering::isOpSuitableForLSE128(const Instruction *I) const { | |||||
if (!Subtarget->hasLSE128()) | |||||
return false; | |||||
// Only use SWPP for stores where LSE2 would require a fence. Unlike STP, SWPP | |||||
// will clobber the two registers. | |||||
if (const auto *SI = dyn_cast<StoreInst>(I)) | |||||
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && | |||||
SI->getAlign() >= Align(16) && | |||||
(SI->getOrdering() == AtomicOrdering::Release || | |||||
SI->getOrdering() == AtomicOrdering::SequentiallyConsistent); | |||||
if (const auto *RMW = dyn_cast<AtomicRMWInst>(I)) | |||||
return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 && | |||||
RMW->getAlign() >= Align(16) && | |||||
(RMW->getOperation() == AtomicRMWInst::Xchg || | |||||
RMW->getOperation() == AtomicRMWInst::And || | |||||
RMW->getOperation() == AtomicRMWInst::Or); | |||||
return false; | |||||
} | |||||
bool AArch64TargetLowering::isOpSuitableForRCPC3(const Instruction *I) const { | bool AArch64TargetLowering::isOpSuitableForRCPC3(const Instruction *I) const { | ||||
if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3()) | if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3()) | ||||
return false; | return false; | ||||
if (auto LI = dyn_cast<LoadInst>(I)) | if (auto LI = dyn_cast<LoadInst>(I)) | ||||
return LI->getType()->getPrimitiveSizeInBits() == 128 && | return LI->getType()->getPrimitiveSizeInBits() == 128 && | ||||
LI->getAlign() >= Align(16) && | LI->getAlign() >= Align(16) && | ||||
LI->getOrdering() == AtomicOrdering::Acquire; | LI->getOrdering() == AtomicOrdering::Acquire; | ||||
if (auto SI = dyn_cast<StoreInst>(I)) | if (auto SI = dyn_cast<StoreInst>(I)) | ||||
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && | return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && | ||||
SI->getAlign() >= Align(16) && | SI->getAlign() >= Align(16) && | ||||
SI->getOrdering() == AtomicOrdering::Release; | SI->getOrdering() == AtomicOrdering::Release; | ||||
return false; | return false; | ||||
} | } | ||||
bool AArch64TargetLowering::shouldInsertFencesForAtomic( | bool AArch64TargetLowering::shouldInsertFencesForAtomic( | ||||
const Instruction *I) const { | const Instruction *I) const { | ||||
if (isOpSuitableForRCPC3(I)) | if (isOpSuitableForRCPC3(I)) | ||||
return false; | return false; | ||||
return isOpSuitableForLDPSTP(I); | if (isOpSuitableForLSE128(I)) | ||||
return false; | |||||
if (isOpSuitableForLDPSTP(I)) | |||||
return true; | |||||
return false; | |||||
} | } | ||||
bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore( | bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore( | ||||
const Instruction *I) const { | const Instruction *I) const { | ||||
// Store-Release instructions only provide seq_cst guarantees when paired with | // Store-Release instructions only provide seq_cst guarantees when paired with | ||||
// Load-Acquire instructions. MSVC CRT does not use these instructions to | // Load-Acquire instructions. MSVC CRT does not use these instructions to | ||||
// implement seq_cst loads and stores, so we need additional explicit fences | // implement seq_cst loads and stores, so we need additional explicit fences | ||||
// after memory writes. | // after memory writes. | ||||
Show All 16 Lines | |||||
} | } | ||||
// Loads and stores less than 128-bits are already atomic; ones above that | // Loads and stores less than 128-bits are already atomic; ones above that | ||||
// are doomed anyway, so defer to the default libcall and blame the OS when | // are doomed anyway, so defer to the default libcall and blame the OS when | ||||
// things go wrong. | // things go wrong. | ||||
TargetLoweringBase::AtomicExpansionKind | TargetLoweringBase::AtomicExpansionKind | ||||
AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { | AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { | ||||
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); | unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); | ||||
if (Size != 128 || isOpSuitableForLDPSTP(SI) || isOpSuitableForRCPC3(SI)) | if (Size != 128) | ||||
return AtomicExpansionKind::None; | |||||
if (isOpSuitableForRCPC3(SI)) | |||||
return AtomicExpansionKind::None; | |||||
if (isOpSuitableForLSE128(SI)) | |||||
return AtomicExpansionKind::Expand; | |||||
if (isOpSuitableForLDPSTP(SI)) | |||||
return AtomicExpansionKind::None; | return AtomicExpansionKind::None; | ||||
return AtomicExpansionKind::Expand; | return AtomicExpansionKind::Expand; | ||||
} | } | ||||
// Loads and stores less than 128-bits are already atomic; ones above that | // Loads and stores less than 128-bits are already atomic; ones above that | ||||
// are doomed anyway, so defer to the default libcall and blame the OS when | // are doomed anyway, so defer to the default libcall and blame the OS when | ||||
// things go wrong. | // things go wrong. | ||||
TargetLowering::AtomicExpansionKind | TargetLowering::AtomicExpansionKind | ||||
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { | AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { | ||||
unsigned Size = LI->getType()->getPrimitiveSizeInBits(); | unsigned Size = LI->getType()->getPrimitiveSizeInBits(); | ||||
if (Size != 128 || isOpSuitableForLDPSTP(LI) || isOpSuitableForRCPC3(LI)) | if (Size != 128) | ||||
return AtomicExpansionKind::None; | |||||
if (isOpSuitableForRCPC3(LI)) | |||||
return AtomicExpansionKind::None; | |||||
// No LSE128 loads | |||||
if (isOpSuitableForLDPSTP(LI)) | |||||
return AtomicExpansionKind::None; | return AtomicExpansionKind::None; | ||||
// At -O0, fast-regalloc cannot cope with the live vregs necessary to | // At -O0, fast-regalloc cannot cope with the live vregs necessary to | ||||
// implement atomicrmw without spilling. If the target address is also on the | // implement atomicrmw without spilling. If the target address is also on the | ||||
// stack and close enough to the spill slot, this can lead to a situation | // stack and close enough to the spill slot, this can lead to a situation | ||||
// where the monitor always gets cleared and the atomic operation can never | // where the monitor always gets cleared and the atomic operation can never | ||||
// succeed. So at -O0 lower this operation to a CAS loop. | // succeed. So at -O0 lower this operation to a CAS loop. | ||||
if (getTargetMachine().getOptLevel() == CodeGenOpt::None) | if (getTargetMachine().getOptLevel() == CodeGenOpt::None) | ||||
▲ Show 20 Lines • Show All 1,704 Lines • Show Last 20 Lines |