Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Standalone View
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 22,026 Lines • ▼ Show 20 Lines | return Op.getOpcode() == AArch64ISD::DUP || | ||||
Op.getOperand(0).getOpcode() == AArch64ISD::DUP) || | Op.getOperand(0).getOpcode() == AArch64ISD::DUP) || | ||||
TargetLowering::isTargetCanonicalConstantNode(Op); | TargetLowering::isTargetCanonicalConstantNode(Op); | ||||
} | } | ||||
bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal( | bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal( | ||||
unsigned Opc, LLT Ty1, LLT Ty2) const { | unsigned Opc, LLT Ty1, LLT Ty2) const { | ||||
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); | return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); | ||||
} | } | ||||
dmgreen: Can you add some extra newlines between methods | |||||
bool AArch64TargetLowering::isComplexDeinterleavingSupported() const { | |||||
I think ComplxNum requires NEON, so checking for the first should be enough. dmgreen: I think ComplxNum requires NEON, so checking for the first should be enough. | |||||
This I don't think needs the NEON check. dmgreen: This I don't think needs the NEON check. | |||||
return Subtarget->hasComplxNum(); | |||||
} | |||||
bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( | |||||
ComplexDeinterleavingOperation Operation, Type *Ty) const { | |||||
auto *VTy = dyn_cast<FixedVectorType>(Ty); | |||||
if (!VTy) | |||||
return false; | |||||
auto *ScalarTy = VTy->getScalarType(); | |||||
unsigned NumElements = VTy->getNumElements(); | |||||
unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements; | |||||
if (VTyWidth < 128 && VTyWidth != 64) | |||||
return false; | |||||
Formatting, here and below. dmgreen: Formatting, here and below. | |||||
// 32 is the length of SplitMask in createComplexDeinterleavingIR | |||||
if (NumElements > 32) | |||||
return false; | |||||
if (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) | |||||
return NumElements == 4 || NumElements % 4 == 0; | |||||
if (ScalarTy->isFloatTy()) | |||||
return NumElements == 2 || NumElements == 4 || NumElements % 4 == 0; | |||||
if (ScalarTy->isDoubleTy()) | |||||
return NumElements == 2 || NumElements % 2 == 0; | |||||
return false; | |||||
} | |||||
Value *AArch64TargetLowering::createComplexDeinterleavingIR( | |||||
Add a message to the assert dmgreen: Add a message to the assert | |||||
Instruction *I, ComplexDeinterleavingOperation OperationType, | |||||
unsigned Rotation, Value *InputA, Value *InputB, Value *Accumulator) const { | |||||
Not Done ReplyInline ActionsFormatting, and it probably wants brackets around the || dmgreen: Formatting, and it probably wants brackets around the || | |||||
FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType()); | |||||
IRBuilder<> B(I); | |||||
if (Accumulator == nullptr) | |||||
It isn't clear to me why TyWidth can have any value greater than 128. Don't we expect it to be a multiple of something? SjoerdMeijer: It isn't clear to me why `TyWidth` can have any value greater than 128. Don't we expect it to… | |||||
We do, but it's also backed up by the if ((VTyWidth < 128 && VTyWidth != 64) || !llvm::isPowerOf2_32(VTyWidth)) above. Though changing the assert to reflect this restriction might be best, rather than assuming that it's met. NickGuy: We do, but it's also backed up by the `if ((VTyWidth < 128 && VTyWidth != 64) || !llvm… | |||||
Accumulator = ConstantFP::get(Ty, 0); | |||||
unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements(); | |||||
assert(TyWidth >= 128 || TyWidth == 64); | |||||
if (TyWidth > 128) { | |||||
int Stride = Ty->getNumElements() / 2; | |||||
const int SplitMask[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, | |||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, | |||||
There is quite a lot of debug message left here. I assume it will be removed in the final version? dmgreen: There is quite a lot of debug message left here. I assume it will be removed in the final… | |||||
22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; | |||||
ArrayRef<int> LowerSplitMask(&SplitMask[0], Stride); | |||||
ArrayRef<int> UpperSplitMask(&SplitMask[Stride], Stride); | |||||
Accumulator always has a value at this point, don't need to check it? SjoerdMeijer: `Accumulator` always has a value at this point, don't need to check it? | |||||
This check is needed, however the previous assignment is not. I've moved it down to where it is needed NickGuy: This check is needed, however the previous assignment is not. I've moved it down to where it is… | |||||
auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask); | |||||
auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask); | |||||
auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask); | |||||
auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask); | |||||
auto *LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask); | |||||
auto *UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask); | |||||
auto *LowerSplitInt = createComplexDeinterleavingIR( | |||||
I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc); | |||||
auto *UpperSplitInt = createComplexDeinterleavingIR( | |||||
I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc); | |||||
ArrayRef<int> JoinMask(SplitMask, Ty->getNumElements()); | |||||
return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask); | |||||
} | |||||
if (OperationType == ComplexDeinterleavingOperation::CMulPartial) { | |||||
Intrinsic::ID IntId = Intrinsic::not_intrinsic; | |||||
Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0, | |||||
Intrinsic::aarch64_neon_vcmla_rot90, | |||||
Intrinsic::aarch64_neon_vcmla_rot180, | |||||
Intrinsic::aarch64_neon_vcmla_rot270}; | |||||
unsigned IntIdx = Rotation / 90; | |||||
IntId = IdMap[IntIdx]; | |||||
if (IntId == Intrinsic::not_intrinsic) | |||||
return nullptr; | |||||
return B.CreateIntrinsic(IntId, Ty, {Accumulator, InputB, InputA}); | |||||
} | |||||
if (OperationType == ComplexDeinterleavingOperation::CAdd) { | |||||
Intrinsic::ID IntId = Intrinsic::not_intrinsic; | |||||
if (Rotation == 90) | |||||
IntId = Intrinsic::aarch64_neon_vcadd_rot90; | |||||
else if (Rotation == 270) | |||||
IntId = Intrinsic::aarch64_neon_vcadd_rot270; | |||||
if (IntId == Intrinsic::not_intrinsic) | |||||
return nullptr; | |||||
return B.CreateIntrinsic(IntId, Ty, {InputA, InputB}); | |||||
} | |||||
return nullptr; | |||||
} |
Can you add some extra newlines between methods