Diff 541949

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines
/// Checks the given mask, and determines whether said mask is deinterleaving.		/// Checks the given mask, and determines whether said mask is deinterleaving.
///		///
/// To be deinterleaving, a mask must increment in steps of 2, and either start		/// To be deinterleaving, a mask must increment in steps of 2, and either start
/// with 0 or 1.		/// with 0 or 1.
/// (e.g. an 8x vector deinterleaving mask would be either <0, 2, 4, 6> or		/// (e.g. an 8x vector deinterleaving mask would be either <0, 2, 4, 6> or
/// <1, 3, 5, 7>).		/// <1, 3, 5, 7>).
static bool isDeinterleavingMask(ArrayRef<int> Mask);		static bool isDeinterleavingMask(ArrayRef<int> Mask);

		/// Returns true if the operation is a negation of V, and it works for both
		mgabkaUnsubmitted Done Reply Inline Actions please add a 1 line comment (just to emphasise that it works with floats and integers) mgabka: please add a 1 line comment (just to emphasise that it works with floats and integers)
		/// integers and floats.
		static bool isNeg(Value *V);
		mgabkaUnsubmitted Done Reply Inline Actions please add a comment mgabka: please add a comment

		/// Returns the operand for negation operation.
		static Value getNegOperand(Value V);

namespace {		namespace {

class ComplexDeinterleavingLegacyPass : public FunctionPass {		class ComplexDeinterleavingLegacyPass : public FunctionPass {
public:		public:
static char ID;		static char ID;

ComplexDeinterleavingLegacyPass(const TargetMachine *TM = nullptr)		ComplexDeinterleavingLegacyPass(const TargetMachine *TM = nullptr)
: FunctionPass(ID), TM(TM) {		: FunctionPass(ID), TM(TM) {
Show All 30 Lines
public:		public:
ComplexDeinterleavingOperation Operation;		ComplexDeinterleavingOperation Operation;
Value *Real;		Value *Real;
Value *Imag;		Value *Imag;

// This two members are required exclusively for generating		// This two members are required exclusively for generating
// ComplexDeinterleavingOperation::Symmetric operations.		// ComplexDeinterleavingOperation::Symmetric operations.
unsigned Opcode;		unsigned Opcode;
FastMathFlags Flags;		std::optional<FastMathFlags> Flags;

ComplexDeinterleavingRotation Rotation =		ComplexDeinterleavingRotation Rotation =
ComplexDeinterleavingRotation::Rotation_0;		ComplexDeinterleavingRotation::Rotation_0;
SmallVector<RawNodePtr> Operands;		SmallVector<RawNodePtr> Operands;
Value *ReplacementNode = nullptr;		Value *ReplacementNode = nullptr;

void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }		void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }

▲ Show 20 Lines • Show All 170 Lines • ▼ Show 20 Lines	private:

NodePtr identifyNode(Value R, Value I);		NodePtr identifyNode(Value R, Value I);

/// Determine if a sum of complex numbers can be formed from \p RealAddends		/// Determine if a sum of complex numbers can be formed from \p RealAddends
/// and \p ImagAddens. If \p Accumulator is not null, add the result to it.		/// and \p ImagAddens. If \p Accumulator is not null, add the result to it.
/// Return nullptr if it is not possible to construct a complex number.		/// Return nullptr if it is not possible to construct a complex number.
/// \p Flags are needed to generate symmetric Add and Sub operations.		/// \p Flags are needed to generate symmetric Add and Sub operations.
NodePtr identifyAdditions(std::list<Addend> &RealAddends,		NodePtr identifyAdditions(std::list<Addend> &RealAddends,
std::list<Addend> &ImagAddends, FastMathFlags Flags,		std::list<Addend> &ImagAddends,
		std::optional<FastMathFlags> Flags,
NodePtr Accumulator);		NodePtr Accumulator);

/// Extract one addend that have both real and imaginary parts positive.		/// Extract one addend that have both real and imaginary parts positive.
NodePtr extractPositiveAddend(std::list<Addend> &RealAddends,		NodePtr extractPositiveAddend(std::list<Addend> &RealAddends,
std::list<Addend> &ImagAddends);		std::list<Addend> &ImagAddends);

/// Determine if sum of multiplications of complex numbers can be formed from		/// Determine if sum of multiplications of complex numbers can be formed from
/// \p RealMuls and \p ImagMuls. If \p Accumulator is not null, add the result		/// \p RealMuls and \p ImagMuls. If \p Accumulator is not null, add the result
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines	static bool isDeinterleavingMask(ArrayRef<int> Mask) {
for (int Idx = 1; Idx < HalfNumElements; ++Idx) {		for (int Idx = 1; Idx < HalfNumElements; ++Idx) {
if (Mask[Idx] != (Idx * 2) + Offset)		if (Mask[Idx] != (Idx * 2) + Offset)
return false;		return false;
}		}

return true;		return true;
}		}

		bool isNeg(Value *V) {
		return match(V, m_FNeg(m_Value())) \|\| match(V, m_Neg(m_Value()));
		}

		Value getNegOperand(Value V) {
		assert(isNeg(V));
		auto *I = cast<Instruction>(V);
		if (I->getOpcode() == Instruction::FNeg)
		return I->getOperand(0);
		mgabkaUnsubmitted Done Reply Inline Actions I think you can use here m_Neg pattern matches, documentation for it says: Matches a "Neg" as 'sub 0, V' mgabka: I think you can use here m_Neg pattern matches, documentation for it says: Matches a "Neg" as…

		return I->getOperand(1);
		}

bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {		bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
ComplexDeinterleavingGraph Graph(TL, TLI);		ComplexDeinterleavingGraph Graph(TL, TLI);
if (Graph.collectPotentialReductions(B))		if (Graph.collectPotentialReductions(B))
Graph.identifyReductionNodes();		Graph.identifyReductionNodes();

for (auto &I : *B)		for (auto &I : *B)
Graph.identifyNodes(&I);		Graph.identifyNodes(&I);

Show All 12 Lines	ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << Real << " / " << Imag		LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << Real << " / " << Imag
<< "\n");		<< "\n");

if (!Real->hasOneUse() \|\| !Imag->hasOneUse()) {		if (!Real->hasOneUse() \|\| !Imag->hasOneUse()) {
LLVM_DEBUG(dbgs() << " - Mul operand has multiple uses.\n");		LLVM_DEBUG(dbgs() << " - Mul operand has multiple uses.\n");
return nullptr;		return nullptr;
}		}

if (Real->getOpcode() != Instruction::FMul \|\|		if ((Real->getOpcode() != Instruction::FMul &&
Imag->getOpcode() != Instruction::FMul) {		Real->getOpcode() != Instruction::Mul) \|\|
LLVM_DEBUG(dbgs() << " - Real or imaginary instruction is not fmul\n");		(Imag->getOpcode() != Instruction::FMul &&
		Imag->getOpcode() != Instruction::Mul)) {
		LLVM_DEBUG(
		dbgs() << " - Real or imaginary instruction is not fmul or mul\n");
return nullptr;		return nullptr;
}		}

Value *R0 = Real->getOperand(0);		Value *R0 = Real->getOperand(0);
Value *R1 = Real->getOperand(1);		Value *R1 = Real->getOperand(1);
Value *I0 = Imag->getOperand(0);		Value *I0 = Imag->getOperand(0);
Value *I1 = Imag->getOperand(1);		Value *I1 = Imag->getOperand(1);

// A +/+ has a rotation of 0. If any of the operands are fneg, we flip the		// A +/+ has a rotation of 0. If any of the operands are fneg, we flip the
// rotations and use the operand.		// rotations and use the operand.
unsigned Negs = 0;		unsigned Negs = 0;
Value *Op;		Value *Op;
if (match(R0, m_Neg(m_Value(Op)))) {		if (match(R0, m_Neg(m_Value(Op)))) {
Negs \|= 1;		Negs \|= 1;
R0 = Op;		R0 = Op;
} else if (match(R1, m_Neg(m_Value(Op)))) {		} else if (match(R1, m_Neg(m_Value(Op)))) {
Negs \|= 1;		Negs \|= 1;
R1 = Op;		R1 = Op;
}		}

if (match(I0, m_Neg(m_Value(Op)))) {		if (isNeg(I0)) {
Negs \|= 2;		Negs \|= 2;
Negs ^= 1;		Negs ^= 1;
I0 = Op;		I0 = Op;
} else if (match(I1, m_Neg(m_Value(Op)))) {		} else if (match(I1, m_Neg(m_Value(Op)))) {
Negs \|= 2;		Negs \|= 2;
Negs ^= 1;		Negs ^= 1;
I1 = Op;		I1 = Op;
}		}
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
}		}

ComplexDeinterleavingGraph::NodePtr		ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,		ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
Instruction *Imag) {		Instruction *Imag) {
LLVM_DEBUG(dbgs() << "identifyPartialMul " << Real << " / " << Imag		LLVM_DEBUG(dbgs() << "identifyPartialMul " << Real << " / " << Imag
<< "\n");		<< "\n");
// Determine rotation		// Determine rotation
		auto IsAdd = [](unsigned Op) {
		return Op == Instruction::FAdd \|\| Op == Instruction::Add;
		};
		auto IsSub = [](unsigned Op) {
		return Op == Instruction::FSub \|\| Op == Instruction::Sub;
		};
ComplexDeinterleavingRotation Rotation;		ComplexDeinterleavingRotation Rotation;
if (Real->getOpcode() == Instruction::FAdd &&		if (IsAdd(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
Imag->getOpcode() == Instruction::FAdd)
Rotation = ComplexDeinterleavingRotation::Rotation_0;		Rotation = ComplexDeinterleavingRotation::Rotation_0;
else if (Real->getOpcode() == Instruction::FSub &&		else if (IsSub(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
Imag->getOpcode() == Instruction::FAdd)
Rotation = ComplexDeinterleavingRotation::Rotation_90;		Rotation = ComplexDeinterleavingRotation::Rotation_90;
else if (Real->getOpcode() == Instruction::FSub &&		else if (IsSub(Real->getOpcode()) && IsSub(Imag->getOpcode()))
Imag->getOpcode() == Instruction::FSub)
Rotation = ComplexDeinterleavingRotation::Rotation_180;		Rotation = ComplexDeinterleavingRotation::Rotation_180;
else if (Real->getOpcode() == Instruction::FAdd &&		else if (IsAdd(Real->getOpcode()) && IsSub(Imag->getOpcode()))
Imag->getOpcode() == Instruction::FSub)
Rotation = ComplexDeinterleavingRotation::Rotation_270;		Rotation = ComplexDeinterleavingRotation::Rotation_270;
else {		else {
LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n");		LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n");
return nullptr;		return nullptr;
}		}

if (!Real->getFastMathFlags().allowContract() \|\|		if (isa<FPMathOperator>(Real) &&
!Imag->getFastMathFlags().allowContract()) {		(!Real->getFastMathFlags().allowContract() \|\|
		!Imag->getFastMathFlags().allowContract())) {
LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n");		LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n");
return nullptr;		return nullptr;
}		}

Value *CR = Real->getOperand(0);		Value *CR = Real->getOperand(0);
Instruction *RealMulI = dyn_cast<Instruction>(Real->getOperand(1));		Instruction *RealMulI = dyn_cast<Instruction>(Real->getOperand(1));
if (!RealMulI)		if (!RealMulI)
return nullptr;		return nullptr;
▲ Show 20 Lines • Show All 146 Lines • ▼ Show 20 Lines
}		}

static bool isInstructionPotentiallySymmetric(Instruction *I) {		static bool isInstructionPotentiallySymmetric(Instruction *I) {
switch (I->getOpcode()) {		switch (I->getOpcode()) {
case Instruction::FAdd:		case Instruction::FAdd:
case Instruction::FSub:		case Instruction::FSub:
case Instruction::FMul:		case Instruction::FMul:
case Instruction::FNeg:		case Instruction::FNeg:
		case Instruction::Add:
		case Instruction::Sub:
		case Instruction::Mul:
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

ComplexDeinterleavingGraph::NodePtr		ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,		ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	ComplexDeinterleavingGraph::identifyNode(Value R, Value I) {

LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n");		LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n");
return nullptr;		return nullptr;
}		}

ComplexDeinterleavingGraph::NodePtr		ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyReassocNodes(Instruction *Real,		ComplexDeinterleavingGraph::identifyReassocNodes(Instruction *Real,
Instruction *Imag) {		Instruction *Imag) {
		auto IsOperationSupported = [](unsigned Opcode) -> bool {
		mgabkaUnsubmitted Done Reply Inline Actions probably should start with capital S mgabka: probably should start with capital S
		return Opcode == Instruction::FAdd \|\| Opcode == Instruction::FSub \|\|
		Opcode == Instruction::FNeg \|\| Opcode == Instruction::Add \|\|
		Opcode == Instruction::Sub;
		};

if ((Real->getOpcode() != Instruction::FAdd &&		if (!IsOperationSupported(Real->getOpcode()) \|\|
Real->getOpcode() != Instruction::FSub &&		!IsOperationSupported(Imag->getOpcode()))
Real->getOpcode() != Instruction::FNeg) \|\|
(Imag->getOpcode() != Instruction::FAdd &&
Imag->getOpcode() != Instruction::FSub &&
Imag->getOpcode() != Instruction::FNeg))
return nullptr;		return nullptr;

		std::optional<FastMathFlags> Flags;
		if (isa<FPMathOperator>(Real)) {
if (Real->getFastMathFlags() != Imag->getFastMathFlags()) {		if (Real->getFastMathFlags() != Imag->getFastMathFlags()) {
		mgabkaUnsubmitted Done Reply Inline Actions looks like it can be deleted as we have an assignment after the check below. mgabka: looks like it can be deleted as we have an assignment after the check below.
LLVM_DEBUG(		LLVM_DEBUG(dbgs() << "The flags in Real and Imaginary instructions are "
dbgs()		"not identical\n");
<< "The flags in Real and Imaginary instructions are not identical\n");
return nullptr;		return nullptr;
}		}

FastMathFlags Flags = Real->getFastMathFlags();		Flags = Real->getFastMathFlags();
if (!Flags.allowReassoc()) {		if (!Flags->allowReassoc()) {
LLVM_DEBUG(		LLVM_DEBUG(
dbgs() << "the 'Reassoc' attribute is missing in the FastMath flags\n");		dbgs()
		<< "the 'Reassoc' attribute is missing in the FastMath flags\n");
return nullptr;		return nullptr;
}		}
		mgabkaUnsubmitted Done Reply Inline Actions wouldn't be more clear to guard it by if(isa<FPMathOperator>(Real)) ? on the same from do we need to have a check somewhere that Real and Imag are of the same type? i.e both fp or integer? mgabka: wouldn't be more clear to guard it by if(isa<FPMathOperator>(Real)) ? on the same from do we…
		igor.kirillovAuthorUnsubmitted Done Reply Inline Actions By design, they should not have different types, but after reading your comment, I realised I missed a check inside identifyReductions. So, I created a patch to fix that: https://reviews.llvm.org/D153862 igor.kirillov: By design, they should not have different types, but after reading your comment, I realised I…
		}

// Collect multiplications and addend instructions from the given instruction		// Collect multiplications and addend instructions from the given instruction
// while traversing it operands. Additionally, verify that all instructions		// while traversing it operands. Additionally, verify that all instructions
// have the same fast math flags.		// have the same fast math flags.
auto Collect = [&Flags](Instruction *Insn, std::vector<Product> &Muls,		auto Collect = [&Flags](Instruction *Insn, std::vector<Product> &Muls,
std::list<Addend> &Addends) -> bool {		std::list<Addend> &Addends) -> bool {
SmallVector<PointerIntPair<Value *, 1, bool>> Worklist = {{Insn, true}};		SmallVector<PointerIntPair<Value *, 1, bool>> Worklist = {{Insn, true}};
SmallPtrSet<Value *, 8> Visited;		SmallPtrSet<Value *, 8> Visited;
Show All 15 Lines	while (!Worklist.empty()) {
// the latter case, we will attempt to separately identify the complex		// the latter case, we will attempt to separately identify the complex
// operation from here in order to create a shared		// operation from here in order to create a shared
// ComplexDeinterleavingCompositeNode.		// ComplexDeinterleavingCompositeNode.
if (I != Insn && I->getNumUses() > 1) {		if (I != Insn && I->getNumUses() > 1) {
LLVM_DEBUG(dbgs() << "Found potential sub-expression: " << *I << "\n");		LLVM_DEBUG(dbgs() << "Found potential sub-expression: " << *I << "\n");
Addends.emplace_back(I, IsPositive);		Addends.emplace_back(I, IsPositive);
continue;		continue;
}		}
		switch (I->getOpcode()) {
if (I->getOpcode() == Instruction::FAdd) {		case Instruction::FAdd:
		case Instruction::Add:
Worklist.emplace_back(I->getOperand(1), IsPositive);		Worklist.emplace_back(I->getOperand(1), IsPositive);
Worklist.emplace_back(I->getOperand(0), IsPositive);		Worklist.emplace_back(I->getOperand(0), IsPositive);
} else if (I->getOpcode() == Instruction::FSub) {		break;
		case Instruction::FSub:
		Worklist.emplace_back(I->getOperand(1), !IsPositive);
		Worklist.emplace_back(I->getOperand(0), IsPositive);
		break;
		case Instruction::Sub:
		if (isNeg(I)) {
		Worklist.emplace_back(getNegOperand(I), !IsPositive);
		} else {
		mgabkaUnsubmitted Not Done Reply Inline Actions looking at the diff this looks like a new functionality, maybe worth to add it as a separate patch? mgabka: looking at the diff this looks like a new functionality, maybe worth to add it as a separate…
		igor.kirillovAuthorUnsubmitted Not Done Reply Inline Actions That is part of integer support functionality, namely when we have a Neg operation, for example, c = - a * b; igor.kirillov: That is part of integer support functionality, namely when we have a Neg operation, for example…
Worklist.emplace_back(I->getOperand(1), !IsPositive);		Worklist.emplace_back(I->getOperand(1), !IsPositive);
Worklist.emplace_back(I->getOperand(0), IsPositive);		Worklist.emplace_back(I->getOperand(0), IsPositive);
} else if (I->getOpcode() == Instruction::FMul) {		}
		break;
		case Instruction::FMul:
		case Instruction::Mul: {
Value A, B;		Value A, B;
if (match(I->getOperand(0), m_FNeg(m_Value(A)))) {		if (isNeg(I->getOperand(0))) {
		A = getNegOperand(I->getOperand(0));
IsPositive = !IsPositive;		IsPositive = !IsPositive;
} else {		} else {
A = I->getOperand(0);		A = I->getOperand(0);
}		}

if (match(I->getOperand(1), m_FNeg(m_Value(B)))) {		if (isNeg(I->getOperand(1))) {
		B = getNegOperand(I->getOperand(1));
IsPositive = !IsPositive;		IsPositive = !IsPositive;
} else {		} else {
B = I->getOperand(1);		B = I->getOperand(1);
}		}
Muls.push_back(Product{A, B, IsPositive});		Muls.push_back(Product{A, B, IsPositive});
} else if (I->getOpcode() == Instruction::FNeg) {		break;
		}
		case Instruction::FNeg:
Worklist.emplace_back(I->getOperand(0), !IsPositive);		Worklist.emplace_back(I->getOperand(0), !IsPositive);
} else {		break;
		default:
Addends.emplace_back(I, IsPositive);		Addends.emplace_back(I, IsPositive);
continue;		continue;
}		}

if (I->getFastMathFlags() != Flags) {		if (Flags && I->getFastMathFlags() != *Flags) {
LLVM_DEBUG(dbgs() << "The instruction's fast math flags are "		LLVM_DEBUG(dbgs() << "The instruction's fast math flags are "
"inconsistent with the root instructions' flags: "		"inconsistent with the root instructions' flags: "
<< *I << "\n");		<< *I << "\n");
return false;		return false;
}		}
}		}
return true;		return true;
};		};
▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines	if (!all_of(ProcessedReal, [](bool V) { return V; }) \|\|
});		});
return nullptr;		return nullptr;
}		}

return Result;		return Result;
}		}

ComplexDeinterleavingGraph::NodePtr		ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyAdditions(std::list<Addend> &RealAddends,		ComplexDeinterleavingGraph::identifyAdditions(
std::list<Addend> &ImagAddends,		std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends,
FastMathFlags Flags,		std::optional<FastMathFlags> Flags, NodePtr Accumulator = nullptr) {
NodePtr Accumulator = nullptr) {
if (RealAddends.size() != ImagAddends.size())		if (RealAddends.size() != ImagAddends.size())
return nullptr;		return nullptr;

NodePtr Result;		NodePtr Result;
// If we have accumulator use it as first addend		// If we have accumulator use it as first addend
if (Accumulator)		if (Accumulator)
Result = Accumulator;		Result = Accumulator;
// Otherwise find an element with both positive real and imaginary parts.		// Otherwise find an element with both positive real and imaginary parts.
Show All 34 Lines	for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
dbgs().indent(4) << "Y: " << *I << "\n";		dbgs().indent(4) << "Y: " << *I << "\n";
dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";		dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
});		});

NodePtr TmpNode;		NodePtr TmpNode;
if (Rotation == llvm::ComplexDeinterleavingRotation::Rotation_0) {		if (Rotation == llvm::ComplexDeinterleavingRotation::Rotation_0) {
TmpNode = prepareCompositeNode(		TmpNode = prepareCompositeNode(
ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);		ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
		if (Flags) {
TmpNode->Opcode = Instruction::FAdd;		TmpNode->Opcode = Instruction::FAdd;
TmpNode->Flags = Flags;		TmpNode->Flags = *Flags;
		} else {
		TmpNode->Opcode = Instruction::Add;
		}
		mgabkaUnsubmitted Not Done Reply Inline Actions in my opinion it would make more sense to distinguishing between fp and int operations based on the original instructions, not based on extra flags we want to propagate. It would make the code more logical, i.e if fp propagate flag and set to fadd. Also I recall that this pass can match things produced by -O3, are the flags set in that case as well? how difficult it would be to change the logic to the one I propose? mgabka: in my opinion it would make more sense to distinguishing between fp and int operations based on…
		igor.kirillovAuthorUnsubmitted Not Done Reply Inline Actions FFastMathOperation always has FFastFlags; any Integer operation doesn't have them. So, I have to choose between having two variables (bool HasFastFlags, FFastFlags Flags) or having only one variable that incorporates this information (std::optional<FFastFlags>). I suggest using the second approach and reducing the number of entities. igor.kirillov: FFastMathOperation always has FFastFlags; any Integer operation doesn't have them. So…
} else if (Rotation ==		} else if (Rotation ==
llvm::ComplexDeinterleavingRotation::Rotation_180) {		llvm::ComplexDeinterleavingRotation::Rotation_180) {
TmpNode = prepareCompositeNode(		TmpNode = prepareCompositeNode(
ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);		ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
		if (Flags) {
TmpNode->Opcode = Instruction::FSub;		TmpNode->Opcode = Instruction::FSub;
TmpNode->Flags = Flags;		TmpNode->Flags = *Flags;
		} else {
		TmpNode->Opcode = Instruction::Sub;
		}
} else {		} else {
TmpNode = prepareCompositeNode(ComplexDeinterleavingOperation::CAdd,		TmpNode = prepareCompositeNode(ComplexDeinterleavingOperation::CAdd,
nullptr, nullptr);		nullptr, nullptr);
TmpNode->Rotation = Rotation;		TmpNode->Rotation = Rotation;
}		}

TmpNode->addOperand(Result);		TmpNode->addOperand(Result);
TmpNode->addOperand(AddNode);		TmpNode->addOperand(AddNode);
▲ Show 20 Lines • Show All 479 Lines • ▼ Show 20 Lines	ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real,
PlaceholderNode->addOperand(NodeA);		PlaceholderNode->addOperand(NodeA);
PlaceholderNode->addOperand(NodeB);		PlaceholderNode->addOperand(NodeB);
FinalInstructions.insert(MaskA);		FinalInstructions.insert(MaskA);
FinalInstructions.insert(MaskB);		FinalInstructions.insert(MaskB);
return submitCompositeNode(PlaceholderNode);		return submitCompositeNode(PlaceholderNode);
}		}

static Value *replaceSymmetricNode(IRBuilderBase &B, unsigned Opcode,		static Value *replaceSymmetricNode(IRBuilderBase &B, unsigned Opcode,
FastMathFlags Flags, Value *InputA,		std::optional<FastMathFlags> Flags,
Value *InputB) {		Value InputA, Value InputB) {
Value *I;		Value *I;
switch (Opcode) {		switch (Opcode) {
case Instruction::FNeg:		case Instruction::FNeg:
I = B.CreateFNeg(InputA);		I = B.CreateFNeg(InputA);
break;		break;
		mgabkaUnsubmitted Done Reply Inline Actions wouldn't be easier to always set FatMathFlags if they exist, outside of this switch? mgabka: wouldn't be easier to always set FatMathFlags if they exist, outside of this switch?
case Instruction::FAdd:		case Instruction::FAdd:
I = B.CreateFAdd(InputA, InputB);		I = B.CreateFAdd(InputA, InputB);
break;		break;
		case Instruction::Add:
		I = B.CreateAdd(InputA, InputB);
		break;
case Instruction::FSub:		case Instruction::FSub:
I = B.CreateFSub(InputA, InputB);		I = B.CreateFSub(InputA, InputB);
break;		break;
		case Instruction::Sub:
		I = B.CreateSub(InputA, InputB);
		break;
case Instruction::FMul:		case Instruction::FMul:
I = B.CreateFMul(InputA, InputB);		I = B.CreateFMul(InputA, InputB);
break;		break;
		case Instruction::Mul:
		I = B.CreateMul(InputA, InputB);
		break;
default:		default:
llvm_unreachable("Incorrect symmetric opcode");		llvm_unreachable("Incorrect symmetric opcode");
}		}
cast<Instruction>(I)->setFastMathFlags(Flags);		if (Flags)
		cast<Instruction>(I)->setFastMathFlags(*Flags);
return I;		return I;
}		}

Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,		Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
RawNodePtr Node) {		RawNodePtr Node) {
if (Node->ReplacementNode)		if (Node->ReplacementNode)
return Node->ReplacementNode;		return Node->ReplacementNode;

▲ Show 20 Lines • Show All 156 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 25,852 Lines • ▼ Show 20 Lines
}		}

bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(		bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {		unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) \|\| Ty1 == LLT::scalar(64));		return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) \|\| Ty1 == LLT::scalar(64));
}		}

bool AArch64TargetLowering::isComplexDeinterleavingSupported() const {		bool AArch64TargetLowering::isComplexDeinterleavingSupported() const {
return Subtarget->hasSVE() \|\| Subtarget->hasComplxNum();		return Subtarget->hasSVE() \|\| Subtarget->hasSVE2() \|\|
		Subtarget->hasComplxNum();
}		}

bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(		bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
ComplexDeinterleavingOperation Operation, Type *Ty) const {		ComplexDeinterleavingOperation Operation, Type *Ty) const {
auto *VTy = dyn_cast<VectorType>(Ty);		auto *VTy = dyn_cast<VectorType>(Ty);
if (!VTy)		if (!VTy)
return false;		return false;

Show All 9 Lines	bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
// additional 64 bits for Neon). Additionally, these vectors must have a		// additional 64 bits for Neon). Additionally, these vectors must have a
// power-of-2 size, as we later split them into the smallest supported size		// power-of-2 size, as we later split them into the smallest supported size
// and merging them back together after applying complex operation.		// and merging them back together after applying complex operation.
unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;		unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
if ((VTyWidth < 128 && (VTy->isScalableTy() \|\| VTyWidth != 64)) \|\|		if ((VTyWidth < 128 && (VTy->isScalableTy() \|\| VTyWidth != 64)) \|\|
!llvm::isPowerOf2_32(VTyWidth))		!llvm::isPowerOf2_32(VTyWidth))
return false;		return false;

		if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) {
		unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
		return 8 <= ScalarWidth && ScalarWidth <= 64;
		}

return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) \|\|		return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) \|\|
ScalarTy->isFloatTy() \|\| ScalarTy->isDoubleTy();		ScalarTy->isFloatTy() \|\| ScalarTy->isDoubleTy();
}		}

Value *AArch64TargetLowering::createComplexDeinterleavingIR(		Value *AArch64TargetLowering::createComplexDeinterleavingIR(
IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,		IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value InputA, Value InputB,		ComplexDeinterleavingRotation Rotation, Value InputA, Value InputB,
Value *Accumulator) const {		Value *Accumulator) const {
VectorType *Ty = cast<VectorType>(InputA->getType());		VectorType *Ty = cast<VectorType>(InputA->getType());
bool IsScalable = Ty->isScalableTy();		bool IsScalable = Ty->isScalableTy();
		bool IsInt = Ty->getElementType()->isIntegerTy();

unsigned TyWidth =		unsigned TyWidth =
Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();		Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();

assert(((TyWidth >= 128 && llvm::isPowerOf2_32(TyWidth)) \|\| TyWidth == 64) &&		assert(((TyWidth >= 128 && llvm::isPowerOf2_32(TyWidth)) \|\| TyWidth == 64) &&
"Vector type must be either 64 or a power of 2 that is at least 128");		"Vector type must be either 64 or a power of 2 that is at least 128");

if (TyWidth > 128) {		if (TyWidth > 128) {
Show All 19 Lines	if (TyWidth > 128) {

auto *Result = B.CreateInsertVector(Ty, PoisonValue::get(Ty), LowerSplitInt,		auto *Result = B.CreateInsertVector(Ty, PoisonValue::get(Ty), LowerSplitInt,
B.getInt64(0));		B.getInt64(0));
return B.CreateInsertVector(Ty, Result, UpperSplitInt, B.getInt64(Stride));		return B.CreateInsertVector(Ty, Result, UpperSplitInt, B.getInt64(Stride));
}		}

if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {		if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
if (Accumulator == nullptr)		if (Accumulator == nullptr)
Accumulator = ConstantFP::get(Ty, 0);		Accumulator = Constant::getNullValue(Ty);

if (IsScalable) {		if (IsScalable) {
auto *Mask = B.CreateVectorSplat(Ty->getElementCount(), B.getInt1(true));		if (IsInt)
		return B.CreateIntrinsic(
		Intrinsic::aarch64_sve_cmla_x, Ty,
		{Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});

		auto *Mask = B.getAllOnesMask(Ty->getElementCount());
return B.CreateIntrinsic(		return B.CreateIntrinsic(
Intrinsic::aarch64_sve_fcmla, Ty,		Intrinsic::aarch64_sve_fcmla, Ty,
		mgabkaUnsubmitted Done Reply Inline Actions the IsInt check he is redundant here as it is already covered above mgabka: the IsInt check he is redundant here as it is already covered above
		igor.kirillovAuthorUnsubmitted Done Reply Inline Actions Yes, that's a rudiment of my first attempt to handle the case. But integer cmla doesn't accept a mask as an argument. igor.kirillov: Yes, that's a rudiment of my first attempt to handle the case. But integer cmla doesn't accept…
{Mask, Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});		{Mask, Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
}		}

Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,		Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
Intrinsic::aarch64_neon_vcmla_rot90,		Intrinsic::aarch64_neon_vcmla_rot90,
Intrinsic::aarch64_neon_vcmla_rot180,		Intrinsic::aarch64_neon_vcmla_rot180,
Intrinsic::aarch64_neon_vcmla_rot270};		Intrinsic::aarch64_neon_vcmla_rot270};


return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,		return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
{Accumulator, InputB, InputA});		{Accumulator, InputB, InputA});
}		}

if (OperationType == ComplexDeinterleavingOperation::CAdd) {		if (OperationType == ComplexDeinterleavingOperation::CAdd) {
if (IsScalable) {		if (IsScalable) {
auto *Mask = B.CreateVectorSplat(Ty->getElementCount(), B.getInt1(true));
if (Rotation == ComplexDeinterleavingRotation::Rotation_90 \|\|		if (Rotation == ComplexDeinterleavingRotation::Rotation_90 \|\|
Rotation == ComplexDeinterleavingRotation::Rotation_270)		Rotation == ComplexDeinterleavingRotation::Rotation_270) {
		if (IsInt)
		return B.CreateIntrinsic(
		Intrinsic::aarch64_sve_cadd_x, Ty,
		{InputA, InputB, B.getInt32((int)Rotation * 90)});

		auto *Mask = B.getAllOnesMask(Ty->getElementCount());
return B.CreateIntrinsic(		return B.CreateIntrinsic(
		mgabkaUnsubmitted Done Reply Inline Actions I think you can just use here getAllOnesValue mgabka: I think you can just use here getAllOnesValue
Intrinsic::aarch64_sve_fcadd, Ty,		Intrinsic::aarch64_sve_fcadd, Ty,
{Mask, InputA, InputB, B.getInt32((int)Rotation * 90)});		{Mask, InputA, InputB, B.getInt32((int)Rotation * 90)});
		}
return nullptr;		return nullptr;
}		}

Intrinsic::ID IntId = Intrinsic::not_intrinsic;		Intrinsic::ID IntId = Intrinsic::not_intrinsic;
if (Rotation == ComplexDeinterleavingRotation::Rotation_90)		if (Rotation == ComplexDeinterleavingRotation::Rotation_90)
IntId = Intrinsic::aarch64_neon_vcadd_rot90;		IntId = Intrinsic::aarch64_neon_vcadd_rot90;
else if (Rotation == ComplexDeinterleavingRotation::Rotation_270)		else if (Rotation == ComplexDeinterleavingRotation::Rotation_270)
IntId = Intrinsic::aarch64_neon_vcadd_rot270;		IntId = Intrinsic::aarch64_neon_vcadd_rot270;
Show All 19 Lines

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to not transform as the type's minimum size is less than 128 bits.
				define <vscale x 4 x i16> @complex_add_v4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
				mgabkaUnsubmitted Done Reply Inline Actions worth to mention why mgabka: worth to mention why
				; CHECK-LABEL: complex_add_v4i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uunpkhi z2.d, z0.s
				; CHECK-NEXT: uunpklo z0.d, z0.s
				; CHECK-NEXT: uunpkhi z3.d, z1.s
				; CHECK-NEXT: uunpklo z1.d, z1.s
				; CHECK-NEXT: uzp1 z4.d, z0.d, z2.d
				; CHECK-NEXT: uzp2 z0.d, z0.d, z2.d
				; CHECK-NEXT: uzp2 z2.d, z1.d, z3.d
				; CHECK-NEXT: uzp1 z1.d, z1.d, z3.d
				; CHECK-NEXT: sub z0.d, z1.d, z0.d
				; CHECK-NEXT: add z1.d, z2.d, z4.d
				; CHECK-NEXT: zip2 z2.d, z0.d, z1.d
				; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
				; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.experimental.vector.deinterleave2.nxv4i16(<vscale x 4 x i16> %a)
				%a.real = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.experimental.vector.deinterleave2.nxv4i16(<vscale x 4 x i16> %b)
				%b.real = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %b.deinterleaved, 1
				%0 = sub <vscale x 2 x i16> %b.real, %a.imag
				%1 = add <vscale x 2 x i16> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 4 x i16> @llvm.experimental.vector.interleave2.nxv4i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1)
				ret <vscale x 4 x i16> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i16> @complex_add_v8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
				; CHECK-LABEL: complex_add_v8i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z1.h, z1.h, z0.h, #90
				; CHECK-NEXT: mov z0.d, z1.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %a)
				%a.real = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %b)
				%b.real = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %b.deinterleaved, 1
				%0 = sub <vscale x 4 x i16> %b.real, %a.imag
				%1 = add <vscale x 4 x i16> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 8 x i16> @llvm.experimental.vector.interleave2.nxv8i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1)
				ret <vscale x 8 x i16> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 16 x i16> @complex_add_v16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
				; CHECK-LABEL: complex_add_v16i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z2.h, z2.h, z0.h, #90
				; CHECK-NEXT: cadd z3.h, z3.h, z1.h, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: mov z1.d, z3.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %a)
				%a.real = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %b)
				%b.real = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %b.deinterleaved, 1
				%0 = sub <vscale x 8 x i16> %b.real, %a.imag
				%1 = add <vscale x 8 x i16> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 16 x i16> @llvm.experimental.vector.interleave2.nxv16i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1)
				ret <vscale x 16 x i16> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 32 x i16> @complex_add_v32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
				; CHECK-LABEL: complex_add_v32i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z6.h, z6.h, z2.h, #90
				; CHECK-NEXT: cadd z4.h, z4.h, z0.h, #90
				; CHECK-NEXT: cadd z5.h, z5.h, z1.h, #90
				; CHECK-NEXT: cadd z7.h, z7.h, z3.h, #90
				; CHECK-NEXT: mov z0.d, z4.d
				; CHECK-NEXT: mov z1.d, z5.d
				; CHECK-NEXT: mov z2.d, z6.d
				; CHECK-NEXT: mov z3.d, z7.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.experimental.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %a)
				%a.real = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.experimental.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %b)
				%b.real = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %b.deinterleaved, 1
				%0 = sub <vscale x 16 x i16> %b.real, %a.imag
				%1 = add <vscale x 16 x i16> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 32 x i16> @llvm.experimental.vector.interleave2.nxv32i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1)
				ret <vscale x 32 x i16> %interleaved.vec
				}

				declare { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.experimental.vector.deinterleave2.nxv4i16(<vscale x 4 x i16>)
				declare <vscale x 4 x i16> @llvm.experimental.vector.interleave2.nxv4i16(<vscale x 2 x i16>, <vscale x 2 x i16>)

				declare { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16>)
				declare <vscale x 8 x i16> @llvm.experimental.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>)

				declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
				declare <vscale x 16 x i16> @llvm.experimental.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>)

				declare { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.experimental.vector.deinterleave2.nxv32i16(<vscale x 32 x i16>)
				declare <vscale x 32 x i16> @llvm.experimental.vector.interleave2.nxv32i16(<vscale x 16 x i16>, <vscale x 16 x i16>)

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to not transform as the type's minimum size is less than 128 bits.
				define <vscale x 4 x i16> @complex_mul_v4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
				mgabkaUnsubmitted Not Done Reply Inline Actions Could you add a comment explaining why? (I thought that you are going to apply my comment to the previous test to all tests). mgabka: Could you add a comment explaining why? (I thought that you are going to apply my comment to…
				; CHECK-LABEL: complex_mul_v4i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uunpkhi z2.d, z0.s
				; CHECK-NEXT: uunpklo z0.d, z0.s
				; CHECK-NEXT: uunpkhi z3.d, z1.s
				; CHECK-NEXT: uunpklo z1.d, z1.s
				; CHECK-NEXT: uzp1 z4.d, z0.d, z2.d
				; CHECK-NEXT: uzp2 z0.d, z0.d, z2.d
				; CHECK-NEXT: uzp2 z2.d, z1.d, z3.d
				; CHECK-NEXT: uzp1 z1.d, z1.d, z3.d
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: mul z3.d, z1.d, z0.d
				; CHECK-NEXT: mul z1.d, z1.d, z4.d
				; CHECK-NEXT: mla z3.d, p0/m, z2.d, z4.d
				; CHECK-NEXT: msb z0.d, p0/m, z2.d, z1.d
				; CHECK-NEXT: zip2 z1.d, z0.d, z3.d
				; CHECK-NEXT: zip1 z0.d, z0.d, z3.d
				; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.experimental.vector.deinterleave2.nxv4i16(<vscale x 4 x i16> %a)
				%a.real = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.experimental.vector.deinterleave2.nxv4i16(<vscale x 4 x i16> %b)
				%b.real = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %b.deinterleaved, 1
				%0 = mul <vscale x 2 x i16> %b.imag, %a.real
				%1 = mul <vscale x 2 x i16> %b.real, %a.imag
				%2 = add <vscale x 2 x i16> %1, %0
				%3 = mul <vscale x 2 x i16> %b.real, %a.real
				%4 = mul <vscale x 2 x i16> %a.imag, %b.imag
				%5 = sub <vscale x 2 x i16> %3, %4
				%interleaved.vec = tail call <vscale x 4 x i16> @llvm.experimental.vector.interleave2.nxv4i16(<vscale x 2 x i16> %5, <vscale x 2 x i16> %2)
				ret <vscale x 4 x i16> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i16> @complex_mul_v8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
				; CHECK-LABEL: complex_mul_v8i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z2.h, #0 // =0x0
				; CHECK-NEXT: cmla z2.h, z1.h, z0.h, #0
				; CHECK-NEXT: cmla z2.h, z1.h, z0.h, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %a)
				%a.real = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %b)
				%b.real = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %b.deinterleaved, 1
				%0 = mul <vscale x 4 x i16> %b.imag, %a.real
				%1 = mul <vscale x 4 x i16> %b.real, %a.imag
				%2 = add <vscale x 4 x i16> %1, %0
				%3 = mul <vscale x 4 x i16> %b.real, %a.real
				%4 = mul <vscale x 4 x i16> %a.imag, %b.imag
				%5 = sub <vscale x 4 x i16> %3, %4
				%interleaved.vec = tail call <vscale x 8 x i16> @llvm.experimental.vector.interleave2.nxv8i16(<vscale x 4 x i16> %5, <vscale x 4 x i16> %2)
				ret <vscale x 8 x i16> %interleaved.vec
				}
				; Expected to transform
				define <vscale x 16 x i16> @complex_mul_v16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
				; CHECK-LABEL: complex_mul_v16i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z4.h, #0 // =0x0
				; CHECK-NEXT: mov z5.d, z4.d
				; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0
				; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #0
				; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #90
				; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #90
				; CHECK-NEXT: mov z1.d, z4.d
				; CHECK-NEXT: mov z0.d, z5.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %a)
				%a.real = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %b)
				%b.real = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %b.deinterleaved, 1
				%0 = mul <vscale x 8 x i16> %b.imag, %a.real
				%1 = mul <vscale x 8 x i16> %b.real, %a.imag
				%2 = add <vscale x 8 x i16> %1, %0
				%3 = mul <vscale x 8 x i16> %b.real, %a.real
				%4 = mul <vscale x 8 x i16> %a.imag, %b.imag
				%5 = sub <vscale x 8 x i16> %3, %4
				%interleaved.vec = tail call <vscale x 16 x i16> @llvm.experimental.vector.interleave2.nxv16i16(<vscale x 8 x i16> %5, <vscale x 8 x i16> %2)
				ret <vscale x 16 x i16> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 32 x i16> @complex_mul_v32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
				; CHECK-LABEL: complex_mul_v32i16:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z24.h, #0 // =0x0
				; CHECK-NEXT: mov z25.d, z24.d
				; CHECK-NEXT: mov z26.d, z24.d
				; CHECK-NEXT: mov z27.d, z24.d
				; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #0
				; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #0
				; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #0
				; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #0
				; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #90
				; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #90
				; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #90
				; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #90
				; CHECK-NEXT: mov z0.d, z25.d
				; CHECK-NEXT: mov z1.d, z26.d
				; CHECK-NEXT: mov z2.d, z27.d
				; CHECK-NEXT: mov z3.d, z24.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.experimental.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %a)
				%a.real = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.experimental.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %b)
				%b.real = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %b.deinterleaved, 1
				%0 = mul <vscale x 16 x i16> %b.imag, %a.real
				%1 = mul <vscale x 16 x i16> %b.real, %a.imag
				%2 = add <vscale x 16 x i16> %1, %0
				%3 = mul <vscale x 16 x i16> %b.real, %a.real
				%4 = mul <vscale x 16 x i16> %a.imag, %b.imag
				%5 = sub <vscale x 16 x i16> %3, %4
				%interleaved.vec = tail call <vscale x 32 x i16> @llvm.experimental.vector.interleave2.nxv32i16(<vscale x 16 x i16> %5, <vscale x 16 x i16> %2)
				ret <vscale x 32 x i16> %interleaved.vec
				}

				declare { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.experimental.vector.deinterleave2.nxv4i16(<vscale x 4 x i16>)
				declare <vscale x 4 x i16> @llvm.experimental.vector.interleave2.nxv4i16(<vscale x 2 x i16>, <vscale x 2 x i16>)

				declare { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16>)
				declare <vscale x 8 x i16> @llvm.experimental.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>)

				declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
				declare <vscale x 16 x i16> @llvm.experimental.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>)

				declare { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.experimental.vector.deinterleave2.nxv32i16(<vscale x 32 x i16>)
				declare <vscale x 32 x i16> @llvm.experimental.vector.interleave2.nxv32i16(<vscale x 16 x i16>, <vscale x 16 x i16>)

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to transform
				define <vscale x 4 x i32> @complex_add_v4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: complex_add_v4i32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z1.s, z1.s, z0.s, #90
				; CHECK-NEXT: mov z0.d, z1.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %a)
				%a.real = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %b)
				%b.real = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %b.deinterleaved, 1
				%0 = sub <vscale x 2 x i32> %b.real, %a.imag
				%1 = add <vscale x 2 x i32> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 4 x i32> @llvm.experimental.vector.interleave2.nxv4i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1)
				ret <vscale x 4 x i32> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i32> @complex_add_v8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
				; CHECK-LABEL: complex_add_v8i32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z2.s, z2.s, z0.s, #90
				; CHECK-NEXT: cadd z3.s, z3.s, z1.s, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: mov z1.d, z3.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %a)
				%a.real = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %b)
				%b.real = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %b.deinterleaved, 1
				%0 = sub <vscale x 4 x i32> %b.real, %a.imag
				%1 = add <vscale x 4 x i32> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1)
				ret <vscale x 8 x i32> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 16 x i32> @complex_add_v16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
				; CHECK-LABEL: complex_add_v16i32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z6.s, z6.s, z2.s, #90
				; CHECK-NEXT: cadd z4.s, z4.s, z0.s, #90
				; CHECK-NEXT: cadd z5.s, z5.s, z1.s, #90
				; CHECK-NEXT: cadd z7.s, z7.s, z3.s, #90
				; CHECK-NEXT: mov z0.d, z4.d
				; CHECK-NEXT: mov z1.d, z5.d
				; CHECK-NEXT: mov z2.d, z6.d
				; CHECK-NEXT: mov z3.d, z7.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.experimental.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %a)
				%a.real = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.experimental.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %b)
				%b.real = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %b.deinterleaved, 1
				%0 = sub <vscale x 8 x i32> %b.real, %a.imag
				%1 = add <vscale x 8 x i32> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 16 x i32> @llvm.experimental.vector.interleave2.nxv16i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1)
				ret <vscale x 16 x i32> %interleaved.vec
				}

				declare { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32>)
				declare <vscale x 4 x i32> @llvm.experimental.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>)

				declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
				declare <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>)

				declare { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.experimental.vector.deinterleave2.nxv16i32(<vscale x 16 x i32>)
				declare <vscale x 16 x i32> @llvm.experimental.vector.interleave2.nxv16i32(<vscale x 8 x i32>, <vscale x 8 x i32>)

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to transform
				define <vscale x 4 x i32> @complex_mul_v4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: complex_mul_v4i32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z2.s, #0 // =0x0
				; CHECK-NEXT: cmla z2.s, z1.s, z0.s, #0
				; CHECK-NEXT: cmla z2.s, z1.s, z0.s, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %a)
				%a.real = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %b)
				%b.real = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %b.deinterleaved, 1
				%0 = mul <vscale x 2 x i32> %b.imag, %a.real
				%1 = mul <vscale x 2 x i32> %b.real, %a.imag
				%2 = add <vscale x 2 x i32> %1, %0
				%3 = mul <vscale x 2 x i32> %b.real, %a.real
				%4 = mul <vscale x 2 x i32> %a.imag, %b.imag
				%5 = sub <vscale x 2 x i32> %3, %4
				%interleaved.vec = tail call <vscale x 4 x i32> @llvm.experimental.vector.interleave2.nxv4i32(<vscale x 2 x i32> %5, <vscale x 2 x i32> %2)
				ret <vscale x 4 x i32> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i32> @complex_mul_v8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
				; CHECK-LABEL: complex_mul_v8i32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z4.s, #0 // =0x0
				; CHECK-NEXT: mov z5.d, z4.d
				; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0
				; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #0
				; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #90
				; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #90
				; CHECK-NEXT: mov z1.d, z4.d
				; CHECK-NEXT: mov z0.d, z5.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %a)
				%a.real = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %b)
				%b.real = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %b.deinterleaved, 1
				%0 = mul <vscale x 4 x i32> %b.imag, %a.real
				%1 = mul <vscale x 4 x i32> %b.real, %a.imag
				%2 = add <vscale x 4 x i32> %1, %0
				%3 = mul <vscale x 4 x i32> %b.real, %a.real
				%4 = mul <vscale x 4 x i32> %a.imag, %b.imag
				%5 = sub <vscale x 4 x i32> %3, %4
				%interleaved.vec = tail call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> %5, <vscale x 4 x i32> %2)
				ret <vscale x 8 x i32> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 16 x i32> @complex_mul_v16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
				; CHECK-LABEL: complex_mul_v16i32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z24.s, #0 // =0x0
				; CHECK-NEXT: mov z25.d, z24.d
				; CHECK-NEXT: mov z26.d, z24.d
				; CHECK-NEXT: mov z27.d, z24.d
				; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #0
				; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #0
				; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #0
				; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #0
				; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #90
				; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #90
				; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #90
				; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #90
				; CHECK-NEXT: mov z0.d, z25.d
				; CHECK-NEXT: mov z1.d, z26.d
				; CHECK-NEXT: mov z2.d, z27.d
				; CHECK-NEXT: mov z3.d, z24.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.experimental.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %a)
				%a.real = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.experimental.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %b)
				%b.real = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %b.deinterleaved, 1
				%0 = mul <vscale x 8 x i32> %b.imag, %a.real
				%1 = mul <vscale x 8 x i32> %b.real, %a.imag
				%2 = add <vscale x 8 x i32> %1, %0
				%3 = mul <vscale x 8 x i32> %b.real, %a.real
				%4 = mul <vscale x 8 x i32> %a.imag, %b.imag
				%5 = sub <vscale x 8 x i32> %3, %4
				%interleaved.vec = tail call <vscale x 16 x i32> @llvm.experimental.vector.interleave2.nxv16i32(<vscale x 8 x i32> %5, <vscale x 8 x i32> %2)
				ret <vscale x 16 x i32> %interleaved.vec
				}

				declare { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32>)
				declare <vscale x 4 x i32> @llvm.experimental.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>)

				declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
				declare <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>)

				declare { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.experimental.vector.deinterleave2.nxv16i32(<vscale x 16 x i32>)
				declare <vscale x 16 x i32> @llvm.experimental.vector.interleave2.nxv16i32(<vscale x 8 x i32>, <vscale x 8 x i32>)

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to transform
				define <vscale x 2 x i64> @complex_add_v2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: complex_add_v2i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z1.d, z1.d, z0.d, #90
				; CHECK-NEXT: mov z0.d, z1.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.experimental.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> %a)
				%a.real = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.experimental.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> %b)
				%b.real = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %b.deinterleaved, 1
				%0 = sub <vscale x 1 x i64> %b.real, %a.imag
				%1 = add <vscale x 1 x i64> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 2 x i64> @llvm.experimental.vector.interleave2.nxv2i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1)
				ret <vscale x 2 x i64> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 4 x i64> @complex_add_v4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
				; CHECK-LABEL: complex_add_v4i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z2.d, z2.d, z0.d, #90
				; CHECK-NEXT: cadd z3.d, z3.d, z1.d, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: mov z1.d, z3.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %a)
				%a.real = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %b)
				%b.real = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %b.deinterleaved, 1
				%0 = sub <vscale x 2 x i64> %b.real, %a.imag
				%1 = add <vscale x 2 x i64> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1)
				ret <vscale x 4 x i64> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i64> @complex_add_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
				; CHECK-LABEL: complex_add_v8i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z6.d, z6.d, z2.d, #90
				; CHECK-NEXT: cadd z4.d, z4.d, z0.d, #90
				; CHECK-NEXT: cadd z5.d, z5.d, z1.d, #90
				; CHECK-NEXT: cadd z7.d, z7.d, z3.d, #90
				; CHECK-NEXT: mov z0.d, z4.d
				; CHECK-NEXT: mov z1.d, z5.d
				; CHECK-NEXT: mov z2.d, z6.d
				; CHECK-NEXT: mov z3.d, z7.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
				%a.real = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %b)
				%b.real = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %b.deinterleaved, 1
				%0 = sub <vscale x 4 x i64> %b.real, %a.imag
				%1 = add <vscale x 4 x i64> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 8 x i64> @llvm.experimental.vector.interleave2.nxv8i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1)
				ret <vscale x 8 x i64> %interleaved.vec
				}

				declare { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.experimental.vector.deinterleave2.nxv2i64(<vscale x 2 x i64>)
				declare <vscale x 2 x i64> @llvm.experimental.vector.interleave2.nxv2i64(<vscale x 1 x i64>, <vscale x 1 x i64>)

				declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
				declare <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

				declare { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64>)
				declare <vscale x 8 x i64> @llvm.experimental.vector.interleave2.nxv8i64(<vscale x 4 x i64>, <vscale x 4 x i64>)

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to transform
				define <vscale x 2 x i64> @complex_mul_v2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: complex_mul_v2i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z2.d, #0 // =0x0
				; CHECK-NEXT: cmla z2.d, z1.d, z0.d, #0
				; CHECK-NEXT: cmla z2.d, z1.d, z0.d, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.experimental.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> %a)
				%a.real = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.experimental.vector.deinterleave2.nxv2i64(<vscale x 2 x i64> %b)
				%b.real = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 1 x i64>, <vscale x 1 x i64> } %b.deinterleaved, 1
				%0 = mul <vscale x 1 x i64> %b.imag, %a.real
				%1 = mul <vscale x 1 x i64> %b.real, %a.imag
				%2 = add <vscale x 1 x i64> %1, %0
				%3 = mul <vscale x 1 x i64> %b.real, %a.real
				%4 = mul <vscale x 1 x i64> %a.imag, %b.imag
				%5 = sub <vscale x 1 x i64> %3, %4
				%interleaved.vec = tail call <vscale x 2 x i64> @llvm.experimental.vector.interleave2.nxv2i64(<vscale x 1 x i64> %5, <vscale x 1 x i64> %2)
				ret <vscale x 2 x i64> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 4 x i64> @complex_mul_v4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
				; CHECK-LABEL: complex_mul_v4i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z4.d, #0 // =0x0
				; CHECK-NEXT: mov z5.d, z4.d
				; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #0
				; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #0
				; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #90
				; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #90
				; CHECK-NEXT: mov z1.d, z4.d
				; CHECK-NEXT: mov z0.d, z5.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %a)
				%a.real = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %b)
				%b.real = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %b.deinterleaved, 1
				%0 = mul <vscale x 2 x i64> %b.imag, %a.real
				%1 = mul <vscale x 2 x i64> %b.real, %a.imag
				%2 = add <vscale x 2 x i64> %1, %0
				%3 = mul <vscale x 2 x i64> %b.real, %a.real
				%4 = mul <vscale x 2 x i64> %a.imag, %b.imag
				%5 = sub <vscale x 2 x i64> %3, %4
				%interleaved.vec = tail call <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64> %5, <vscale x 2 x i64> %2)
				ret <vscale x 4 x i64> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i64> @complex_mul_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
				; CHECK-LABEL: complex_mul_v8i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z24.d, #0 // =0x0
				; CHECK-NEXT: mov z25.d, z24.d
				; CHECK-NEXT: mov z26.d, z24.d
				; CHECK-NEXT: mov z27.d, z24.d
				; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #0
				; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #0
				; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #0
				; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #0
				; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #90
				; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #90
				; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #90
				; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #90
				; CHECK-NEXT: mov z0.d, z25.d
				; CHECK-NEXT: mov z1.d, z26.d
				; CHECK-NEXT: mov z2.d, z27.d
				; CHECK-NEXT: mov z3.d, z24.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
				%a.real = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %b)
				%b.real = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %b.deinterleaved, 1
				%0 = mul <vscale x 4 x i64> %b.imag, %a.real
				%1 = mul <vscale x 4 x i64> %b.real, %a.imag
				%2 = add <vscale x 4 x i64> %1, %0
				%3 = mul <vscale x 4 x i64> %b.real, %a.real
				%4 = mul <vscale x 4 x i64> %a.imag, %b.imag
				%5 = sub <vscale x 4 x i64> %3, %4
				%interleaved.vec = tail call <vscale x 8 x i64> @llvm.experimental.vector.interleave2.nxv8i64(<vscale x 4 x i64> %5, <vscale x 4 x i64> %2)
				ret <vscale x 8 x i64> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 8 x i64> @complex_minus_mul_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
				; CHECK-LABEL: complex_minus_mul_v8i64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov z24.d, #0 // =0x0
				; CHECK-NEXT: mov z25.d, z24.d
				; CHECK-NEXT: mov z26.d, z24.d
				; CHECK-NEXT: mov z27.d, z24.d
				; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #270
				; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #270
				; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #270
				; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #270
				; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #180
				; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #180
				; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #180
				; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #180
				; CHECK-NEXT: mov z0.d, z25.d
				; CHECK-NEXT: mov z1.d, z26.d
				; CHECK-NEXT: mov z2.d, z27.d
				; CHECK-NEXT: mov z3.d, z24.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
				%a.real = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %a.deinterleaved, 1
				%0 = sub <vscale x 4 x i64> zeroinitializer, %a.real
				%b.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %b)
				%b.real = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i64> } %b.deinterleaved, 1
				%1 = mul <vscale x 4 x i64> %b.real, %0
				%2 = mul <vscale x 4 x i64> %b.imag, %a.imag
				%3 = add <vscale x 4 x i64> %2, %1
				%4 = mul <vscale x 4 x i64> %b.real, %a.imag
				%5 = mul <vscale x 4 x i64> %b.imag, %0
				%6 = sub <vscale x 4 x i64> %5, %4
				%interleaved.vec = tail call <vscale x 8 x i64> @llvm.experimental.vector.interleave2.nxv8i64(<vscale x 4 x i64> %3, <vscale x 4 x i64> %6)
				ret <vscale x 8 x i64> %interleaved.vec
				}

				declare { <vscale x 1 x i64>, <vscale x 1 x i64> } @llvm.experimental.vector.deinterleave2.nxv2i64(<vscale x 2 x i64>)
				declare <vscale x 2 x i64> @llvm.experimental.vector.interleave2.nxv2i64(<vscale x 1 x i64>, <vscale x 1 x i64>)

				declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
				declare <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

				declare { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64>)
				declare <vscale x 8 x i64> @llvm.experimental.vector.interleave2.nxv8i64(<vscale x 4 x i64>, <vscale x 4 x i64>)

llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s --mattr=+sve2 -o - \| FileCheck %s

				target triple = "aarch64-arm-none-eabi"

				; Expected to not transform as the type's minimum size is less than 128 bits.
				define <vscale x 8 x i8> @complex_add_v8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
				; CHECK-LABEL: complex_add_v8i8:
				mgabkaUnsubmitted Not Done Reply Inline Actions please add a comment explaining why mgabka: please add a comment explaining why
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uunpkhi z2.s, z0.h
				; CHECK-NEXT: uunpklo z0.s, z0.h
				; CHECK-NEXT: uunpkhi z3.s, z1.h
				; CHECK-NEXT: uunpklo z1.s, z1.h
				; CHECK-NEXT: uzp1 z4.s, z0.s, z2.s
				; CHECK-NEXT: uzp2 z0.s, z0.s, z2.s
				; CHECK-NEXT: uzp2 z2.s, z1.s, z3.s
				; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s
				; CHECK-NEXT: sub z0.s, z1.s, z0.s
				; CHECK-NEXT: add z1.s, z2.s, z4.s
				; CHECK-NEXT: zip2 z2.s, z0.s, z1.s
				; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
				; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 4 x i8>, <vscale x 4 x i8> } @llvm.experimental.vector.deinterleave2.nxv8i8(<vscale x 8 x i8> %a)
				%a.real = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i8> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i8> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 4 x i8>, <vscale x 4 x i8> } @llvm.experimental.vector.deinterleave2.nxv8i8(<vscale x 8 x i8> %b)
				%b.real = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i8> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i8> } %b.deinterleaved, 1
				%0 = sub <vscale x 4 x i8> %b.real, %a.imag
				%1 = add <vscale x 4 x i8> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 8 x i8> @llvm.experimental.vector.interleave2.nxv8i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1)
				ret <vscale x 8 x i8> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 16 x i8> @complex_add_v16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
				; CHECK-LABEL: complex_add_v16i8:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z1.b, z1.b, z0.b, #90
				; CHECK-NEXT: mov z0.d, z1.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.experimental.vector.deinterleave2.nxv16i8(<vscale x 16 x i8> %a)
				%a.real = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.experimental.vector.deinterleave2.nxv16i8(<vscale x 16 x i8> %b)
				%b.real = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8> } %b.deinterleaved, 1
				%0 = sub <vscale x 8 x i8> %b.real, %a.imag
				%1 = add <vscale x 8 x i8> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 16 x i8> @llvm.experimental.vector.interleave2.nxv16i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1)
				ret <vscale x 16 x i8> %interleaved.vec
				}

				; Expected to transform
				define <vscale x 32 x i8> @complex_add_v32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
				; CHECK-LABEL: complex_add_v32i8:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: cadd z2.b, z2.b, z0.b, #90
				; CHECK-NEXT: cadd z3.b, z3.b, z1.b, #90
				; CHECK-NEXT: mov z0.d, z2.d
				; CHECK-NEXT: mov z1.d, z3.d
				; CHECK-NEXT: ret
				entry:
				%a.deinterleaved = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %a)
				%a.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 0
				%a.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 1
				%b.deinterleaved = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %b)
				%b.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 0
				%b.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 1
				%0 = sub <vscale x 16 x i8> %b.real, %a.imag
				%1 = add <vscale x 16 x i8> %b.imag, %a.real
				%interleaved.vec = tail call <vscale x 32 x i8> @llvm.experimental.vector.interleave2.nxv32i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1)
				ret <vscale x 32 x i8> %interleaved.vec
				}

				declare { <vscale x 4 x i8>, <vscale x 4 x i8> } @llvm.experimental.vector.deinterleave2.nxv8i8(<vscale x 8 x i8>)
				declare <vscale x 8 x i8> @llvm.experimental.vector.interleave2.nxv8i8(<vscale x 4 x i8>, <vscale x 4 x i8>)

				declare { <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.experimental.vector.deinterleave2.nxv16i8(<vscale x 16 x i8>)
				declare <vscale x 16 x i8> @llvm.experimental.vector.interleave2.nxv16i8(<vscale x 8 x i8>, <vscale x 8 x i8>)

				declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
				declare <vscale x 32 x i8> @llvm.experimental.vector.interleave2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>)

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Extend ComplexDeinterleaving pass to recognise patterns using integer types
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 541949

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Extend ComplexDeinterleaving pass to recognise patterns using integer typesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 541949

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll

llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll

[CodeGen] Extend ComplexDeinterleaving pass to recognise patterns using integer types
ClosedPublic