Diff 537743

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

Show First 20 Lines • Show All 255 Lines • ▼ Show 20 Lines	private:
/// %ReductionOP, which we refer to as real and imag (or vice versa), and		/// %ReductionOP, which we refer to as real and imag (or vice versa), and
/// traverse the use-tree to detect complex operations. As this is a reduction		/// traverse the use-tree to detect complex operations. As this is a reduction
/// operation, it will eventually reach RealPHI and ImagPHI, which corresponds		/// operation, it will eventually reach RealPHI and ImagPHI, which corresponds
/// to the %ReductionOPs that we suspect to be complex.		/// to the %ReductionOPs that we suspect to be complex.
/// RealPHI and ImagPHI are used by the identifyPHINode method.		/// RealPHI and ImagPHI are used by the identifyPHINode method.
PHINode *RealPHI = nullptr;		PHINode *RealPHI = nullptr;
PHINode *ImagPHI = nullptr;		PHINode *ImagPHI = nullptr;

		/// Set this flag to true if RealPHI and ImagPHI were reached during reduction
		/// detection.
		bool PHIsFound = false;

/// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode.		/// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode.
/// The new PHINode corresponds to a vector of deinterleaved complex numbers.		/// The new PHINode corresponds to a vector of deinterleaved complex numbers.
/// This mapping is populated during		/// This mapping is populated during
/// ComplexDeinterleavingOperation::ReductionPHI node replacement. It is then		/// ComplexDeinterleavingOperation::ReductionPHI node replacement. It is then
/// used in the ComplexDeinterleavingOperation::ReductionOperation node		/// used in the ComplexDeinterleavingOperation::ReductionOperation node
/// replacement process.		/// replacement process.
std::map<PHINode , PHINode > OldToNewPHI;		std::map<PHINode , PHINode > OldToNewPHI;

▲ Show 20 Lines • Show All 1,142 Lines • ▼ Show 20 Lines	for (auto &PHI : B->phis()) {
auto NumUsers = 0u;		auto NumUsers = 0u;
for (auto *U : ReductionOp->users()) {		for (auto *U : ReductionOp->users()) {
++NumUsers;		++NumUsers;
if (U == &PHI)		if (U == &PHI)
continue;		continue;
FinalReduction = dyn_cast<Instruction>(U);		FinalReduction = dyn_cast<Instruction>(U);
}		}

if (NumUsers != 2 \|\| !FinalReduction \|\| FinalReduction->getParent() == B)		if (NumUsers != 2 \|\| !FinalReduction \|\| FinalReduction->getParent() == B \|\|
		isa<PHINode>(FinalReduction))
continue;		continue;

ReductionInfo[ReductionOp] = {&PHI, FinalReduction};		ReductionInfo[ReductionOp] = {&PHI, FinalReduction};
BackEdge = B;		BackEdge = B;
auto BackEdgeIdx = PHI.getBasicBlockIndex(B);		auto BackEdgeIdx = PHI.getBasicBlockIndex(B);
auto IncomingIdx = BackEdgeIdx == 0 ? 1 : 0;		auto IncomingIdx = BackEdgeIdx == 0 ? 1 : 0;
Incoming = PHI.getIncomingBlock(IncomingIdx);		Incoming = PHI.getIncomingBlock(IncomingIdx);
FoundPotentialReduction = true;		FoundPotentialReduction = true;
Show All 24 Lines	for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {

auto *Real = OperationInstruction[i];		auto *Real = OperationInstruction[i];
auto *Imag = OperationInstruction[j];		auto *Imag = OperationInstruction[j];
if (Real->getType() != Imag->getType())		if (Real->getType() != Imag->getType())
continue;		continue;

RealPHI = ReductionInfo[Real].first;		RealPHI = ReductionInfo[Real].first;
ImagPHI = ReductionInfo[Imag].first;		ImagPHI = ReductionInfo[Imag].first;
		PHIsFound = false;
auto Node = identifyNode(Real, Imag);		auto Node = identifyNode(Real, Imag);
if (!Node) {		if (!Node) {
std::swap(Real, Imag);		std::swap(Real, Imag);
std::swap(RealPHI, ImagPHI);		std::swap(RealPHI, ImagPHI);
Node = identifyNode(Real, Imag);		Node = identifyNode(Real, Imag);
}		}

// If a node is identified, mark its operation instructions as used to		// If a node is identified and reduction PHINode is used in the chain of
// prevent re-identification and attach the node to the real part		// operations, mark its operation instructions as used to prevent
if (Node) {		// re-identification and attach the node to the real part
		if (Node && PHIsFound) {
LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: "		LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: "
<< Real << " / " << Imag << "\n");		<< Real << " / " << Imag << "\n");
Processed[i] = true;		Processed[i] = true;
Processed[j] = true;		Processed[j] = true;
auto RootNode = prepareCompositeNode(		auto RootNode = prepareCompositeNode(
ComplexDeinterleavingOperation::ReductionOperation, Real, Imag);		ComplexDeinterleavingOperation::ReductionOperation, Real, Imag);
RootNode->addOperand(Node);		RootNode->addOperand(Node);
RootToNode[Real] = RootNode;		RootToNode[Real] = RootNode;
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines
}		}

ComplexDeinterleavingGraph::NodePtr		ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,		ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
Instruction *Imag) {		Instruction *Imag) {
if (Real != RealPHI \|\| Imag != ImagPHI)		if (Real != RealPHI \|\| Imag != ImagPHI)
return nullptr;		return nullptr;

		PHIsFound = true;
NodePtr PlaceholderNode = prepareCompositeNode(		NodePtr PlaceholderNode = prepareCompositeNode(
ComplexDeinterleavingOperation::ReductionPHI, Real, Imag);		ComplexDeinterleavingOperation::ReductionPHI, Real, Imag);
return submitCompositeNode(PlaceholderNode);		return submitCompositeNode(PlaceholderNode);
}		}

ComplexDeinterleavingGraph::NodePtr		ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real,		ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real,
Instruction *Imag) {		Instruction *Imag) {
▲ Show 20 Lines • Show All 221 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll

Show First 20 Lines • Show All 230 Lines • ▼ Show 20 Lines	middle.block: ; preds = %vector.body
%bin.rdx40 = fadd fast <2 x double> %11, %10		%bin.rdx40 = fadd fast <2 x double> %11, %10
%17 = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %bin.rdx40)		%17 = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %bin.rdx40)
%bin.rdx = fadd fast <2 x double> %15, %14		%bin.rdx = fadd fast <2 x double> %15, %14
%18 = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %bin.rdx)		%18 = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %bin.rdx)
%.fca.0.0.insert = insertvalue %"struct.std::complex" poison, double %17, 0, 0		%.fca.0.0.insert = insertvalue %"struct.std::complex" poison, double %17, 0, 0
%.fca.0.1.insert = insertvalue %"struct.std::complex" %.fca.0.0.insert, double %18, 0, 1		%.fca.0.1.insert = insertvalue %"struct.std::complex" %.fca.0.0.insert, double %18, 0, 1
ret %"struct.std::complex" %.fca.0.1.insert		ret %"struct.std::complex" %.fca.0.1.insert
}		}

		; The reduced bug from D153355. Shows that reduction was detected where it did not exist.
		define void @bug(i1 %exitcond.not) {
		mgabkaUnsubmitted Done Reply Inline Actions there is no attribute like that, I guess it can be removed, or if attribute was relevant for this test it needs to be added mgabka: there is no attribute like that, I guess it can be removed, or if attribute was relevant for…
		mgabkaUnsubmitted Done Reply Inline Actions I have a general comment about this test, to me looks like it can be simplified and do not use the aarch64 specific intrinsics, isn't it the case that we have only fadd here but no load/store instructions? mgabka: I have a general comment about this test, to me looks like it can be simplified and do not use…
		mgabkaUnsubmitted Done Reply Inline Actions Thanks Igor for adjusting the test, I have a few more comments/requests: Could you change the function name to something more descriptive? Like incorrect_reduction_pattern or something similar remove the numbers fro, variable names (it will make it easier to read) mgabka: Thanks Igor for adjusting the test, I have a few more comments/requests: 1. Could you change…
		; CHECK-LABEL: bug:
		; CHECK: // %bb.0: // %entry
		; CHECK-NEXT: .LBB3_1: // %for.body
		; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
		; CHECK-NEXT: tbz w0, #0, .LBB3_1
		; CHECK-NEXT: // %bb.2: // %for.end.loopexit
		; CHECK-NEXT: ret
		entry:
		br label %for.body

		for.body: ; preds = %for.body, %entry
		%vec_r.0190 = phi <4 x float> [ zeroinitializer, %entry ], [ %lane62, %for.body ]
		%vec_b.0188 = phi <4 x float> [ zeroinitializer, %entry ], [ %lane76, %for.body ]
		%add.i175 = fadd <4 x float> %vec_b.0188, %vec_r.0190
		%lane62 = shufflevector <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
		%lane76 = shufflevector <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
		br i1 %exitcond.not, label %for.end.loopexit, label %for.body

		for.end.loopexit: ; preds = %for.body
		%mul.i177 = fadd <4 x float> %lane62, %add.i175
		%mul.i179 = fadd <4 x float> %lane76, %add.i175
		ret void
		}

declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)		declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Fix incorrectly detected reduction bug in ComplexDeinterleaving pass
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 537743

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Fix incorrectly detected reduction bug in ComplexDeinterleaving passClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 537743

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll

[CodeGen] Fix incorrectly detected reduction bug in ComplexDeinterleaving pass
ClosedPublic