Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Show All 18 Lines | |||||
#include "AArch64.h" | #include "AArch64.h" | ||||
#include "AArch64Subtarget.h" | #include "AArch64Subtarget.h" | ||||
#include "AArch64TargetMachine.h" | #include "AArch64TargetMachine.h" | ||||
#include "llvm/ADT/ArrayRef.h" | #include "llvm/ADT/ArrayRef.h" | ||||
#include "llvm/Analysis/TargetTransformInfo.h" | #include "llvm/Analysis/TargetTransformInfo.h" | ||||
#include "llvm/CodeGen/BasicTTIImpl.h" | #include "llvm/CodeGen/BasicTTIImpl.h" | ||||
#include "llvm/IR/Function.h" | #include "llvm/IR/Function.h" | ||||
#include "llvm/IR/Intrinsics.h" | #include "llvm/IR/Intrinsics.h" | ||||
#include "llvm/MC/SubtargetFeature.h" | |||||
#include <cstdint> | #include <cstdint> | ||||
#include <llvm/CodeGen/ComplexArithmeticPass.h> | |||||
#include <llvm/IR/IntrinsicsAArch64.h> | |||||
namespace llvm { | namespace llvm { | ||||
class APInt; | class APInt; | ||||
class Instruction; | class Instruction; | ||||
class IntrinsicInst; | class IntrinsicInst; | ||||
class Loop; | class Loop; | ||||
class SCEV; | class SCEV; | ||||
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines | public: | ||||
bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } | bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } | ||||
unsigned getGISelRematGlobalCost() const { | unsigned getGISelRematGlobalCost() const { | ||||
return 2; | return 2; | ||||
} | } | ||||
bool supportsScalableVectors() const { return ST->hasSVE(); } | bool supportsScalableVectors() const { return ST->hasSVE(); } | ||||
bool supportsComplexNumberArithmetic() const { return ST->hasComplxNum(); } | |||||
Intrinsic::ID getComplexArithmeticIntrinsic(ComplexArithmeticCandidate *C, | |||||
unsigned &IntArgCount) const { | |||||
if (!ST->hasNEON() || !ST->hasComplxNum()) | |||||
return Intrinsic::not_intrinsic; | |||||
unsigned Rot = C->getRotation(); | |||||
switch (C->Type) { | |||||
case ComplexArithmeticCandidate::Complex_Mul: | |||||
case ComplexArithmeticCandidate::Complex_Mla: { | |||||
// AArch64 doesn't support Complex Mul, so use Mla instead | |||||
C->Type = ComplexArithmeticCandidate::Complex_Mla; | |||||
IntArgCount = 3; | |||||
if (Rot == 0) | |||||
return Intrinsic::aarch64_neon_vcmla_rot0; | |||||
if (Rot == 90) | |||||
return Intrinsic::aarch64_neon_vcmla_rot90; | |||||
if (Rot == 180) | |||||
return Intrinsic::aarch64_neon_vcmla_rot180; | |||||
if (Rot == 270) | |||||
return Intrinsic::aarch64_neon_vcmla_rot270; | |||||
break; | |||||
} | |||||
case ComplexArithmeticCandidate::Complex_Add: { | |||||
IntArgCount = 2; | |||||
if (Rot == 90) | |||||
return Intrinsic::aarch64_neon_vcadd_rot90; | |||||
if (Rot == 270) | |||||
return Intrinsic::aarch64_neon_vcadd_rot270; | |||||
} | |||||
} | |||||
return Intrinsic::not_intrinsic; | |||||
} | |||||
bool validateComplexCandidateDataFlow(ComplexArithmeticCandidate *C, | |||||
Instruction *I) const { | |||||
if (auto *EXT = dyn_cast<ExtractValueInst>(I)) { | |||||
auto *Op = EXT->getOperand(0); | |||||
auto Idx = EXT->getIndices()[0]; | |||||
if (Idx != 0 && Idx != 1) | |||||
return false; | |||||
if (auto *Int = dyn_cast<CallInst>(Op)) { | |||||
if (Int->getIntrinsicID() != Intrinsic::aarch64_neon_ld2) | |||||
return false; | |||||
if (auto *STy = dyn_cast<StructType>(Int->getType())) { | |||||
if (STy->getNumElements() != 2) | |||||
return false; | |||||
Type *ExpectedTy = C->getDataType(); | |||||
return STy->getElementType(0) == ExpectedTy && | |||||
STy->getElementType(1) == ExpectedTy; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
return false; | |||||
} | |||||
void | |||||
filterComplexArithmeticOperand(ComplexArithmeticCandidate *C, Value *V, | |||||
SmallVector<Value *, 4> &Operands, | |||||
SmallVector<Instruction *, 32> &DeadInsts) { | |||||
if (auto *EVI = dyn_cast<ExtractValueInst>(V)) { | |||||
if (auto *CI = dyn_cast<CallInst>(EVI->getOperand(0))) { | |||||
if (CI->getIntrinsicID() == Intrinsic::aarch64_neon_ld2) { | |||||
DeadInsts.push_back(CI); | |||||
DeadInsts.push_back(EVI); | |||||
IRBuilder<> B(CI); | |||||
auto *Ptr = CI->getOperand(0); | |||||
if (auto *BC = dyn_cast<BitCastInst>(Ptr)) { | |||||
DeadInsts.push_back(BC); | |||||
Ptr = BC->getOperand(0); | |||||
} | |||||
auto ContainsLoadForPtr = [Operands](Value *Ptr) { | |||||
for (auto *Op : Operands) { | |||||
if (auto *LOp = dyn_cast<LoadInst>(Op)) { | |||||
if (LOp->getOperand(0) == Ptr) | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
}; | |||||
if (!ContainsLoadForPtr(Ptr)) { | |||||
auto *Ty = C->getDataType(); | |||||
auto *LI = B.CreateLoad(Ty, Ptr); | |||||
Operands.push_back(LI); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, | ||||
ElementCount VF) const; | ElementCount VF) const; | ||||
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, | InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, | ||||
Optional<FastMathFlags> FMF, | Optional<FastMathFlags> FMF, | ||||
TTI::TargetCostKind CostKind); | TTI::TargetCostKind CostKind); | ||||
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, | InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, | ||||
ArrayRef<int> Mask, int Index, | ArrayRef<int> Mask, int Index, | ||||
VectorType *SubTp); | VectorType *SubTp); | ||||
/// @} | /// @} | ||||
}; | }; | ||||
} // end namespace llvm | } // end namespace llvm | ||||
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H | #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |