Diff 117376

lib/CodeGen/SelectionDAG/TargetLowering.cpp

Show First 20 Lines • Show All 463 Lines • ▼ Show 20 Lines	TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
SmallVector <SDValue, 4> NewOps;		SmallVector <SDValue, 4> NewOps;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {		for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
if (i == OpIdx) {		if (i == OpIdx) {
NewOps.push_back(TLO.New);		NewOps.push_back(TLO.New);
continue;		continue;
}		}
NewOps.push_back(User->getOperand(i));		NewOps.push_back(User->getOperand(i));
}		}
TLO.DAG.UpdateNodeOperands(User, NewOps);		User = TLO.DAG.UpdateNodeOperands(User, NewOps);
// Op has less users now, so we may be able to perform additional combines		// Op has less users now, so we may be able to perform additional combines
// with it.		// with it.
DCI.AddToWorklist(Op.getNode());		DCI.AddToWorklist(Op.getNode());
// User's operands have been updated, so we may be able to do new combines		// User's operands have been updated, so we may be able to do new combines
// with it.		// with it.
DCI.AddToWorklist(User);		DCI.AddToWorklist(User);
return true;		return true;
}		}
▲ Show 20 Lines • Show All 3,418 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIISelLowering.cpp

Show First 20 Lines • Show All 6,479 Lines • ▼ Show 20 Lines	for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
}		}

SDLoc DL(Node);		SDLoc DL(Node);
Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,		Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
Node->getOperand(i).getValueType(),		Node->getOperand(i).getValueType(),
Node->getOperand(i)), 0));		Node->getOperand(i)), 0));
}		}

DAG.UpdateNodeOperands(Node, Ops);		Node = DAG.UpdateNodeOperands(Node, Ops);
return Node;		return Node;
		RKSimonUnsubmitted Done Reply Inline Actions return DAG.UpdateNodeOperands(Node, Ops); RKSimon: ``` return DAG.UpdateNodeOperands(Node, Ops); ```
}		}

/// \brief Fold the instructions after selecting them.		/// \brief Fold the instructions after selecting them.
SDNode SITargetLowering::PostISelFolding(MachineSDNode Node,		SDNode SITargetLowering::PostISelFolding(MachineSDNode Node,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();		const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();		unsigned Opcode = Node->getMachineOpcode();

▲ Show 20 Lines • Show All 335 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll

This file was added.

				; RUN: (ulimit -t 5; llc -march=amdgcn < %s \| FileCheck -check-prefix=GCN %s)
				arsenmUnsubmitted Done Reply Inline Actions Does this actually work? I've never seen another test use ulimit or a sub shell. 5 seconds seems high also. arsenm: Does this actually work? I've never seen another test use ulimit or a sub shell. 5 seconds…
				RKSimonUnsubmitted Done Reply Inline Actions Does this work on windows builds? RKSimon: Does this work on windows builds?
				msearlesAuthorUnsubmitted Not Done Reply Inline Actions Nope; will not work on Windows; test adjusted. msearles: Nope; will not work on Windows; test adjusted.
				msearlesAuthorUnsubmitted Not Done Reply Inline Actions Yes, it works quite nicely on Linux; it will not work on Windows (thanks Simon) as ulimit is not available. I removed the ulimit. So, potentially, there may be a compiler regression that causes this to loop infinitely; however, I think that you could say that about any test. msearles: Yes, it works quite nicely on Linux; it will not work on Windows (thanks Simon) as ulimit is…
				RKSimonUnsubmitted Done Reply Inline Actions Remove the brackets? Also, drop the prefix and use the default 'CHECK' ? RKSimon: Remove the brackets? Also, drop the prefix and use the default 'CHECK' ?
				msearlesAuthorUnsubmitted Not Done Reply Inline Actions Done and done. msearles: Done and done.

				; Check we can compile this test without an infinite loop in
				; TLI.SimplifyDemandedBits() due to failure to use return value
				; of TLO.DAG.UpdateNodeOperands()

				; On success, the body of foo() is empty save for a s_endpgm.
				; Check for it.

				@0 = external unnamed_addr addrspace(3) global [462 x float], align 4

				declare i32 @llvm.amdgcn.workitem.id.y()
				declare i32 @llvm.amdgcn.workitem.id.x()
				declare float @llvm.fmuladd.f32(float, float, float)

				; GCN: s_endpgm
				RKSimonUnsubmitted Done Reply Inline Actions You're not testing that foo() is empty - just that it includes s_endpgm RKSimon: You're not testing that foo() is empty - just that it includes s_endpgm
				msearlesAuthorUnsubmitted Not Done Reply Inline Actions Updated the comments within the test; the purpose of the test is to verify that code was generated, not specifically that foo() is empty. msearles: Updated the comments within the test; the purpose of the test is to verify that code was…
				define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture, float) local_unnamed_addr !reqd_work_group_size !{i32 8, i32 16, i32 1} {
				%5 = tail call i32 @llvm.amdgcn.workitem.id.y()
				%6 = tail call i32 @llvm.amdgcn.workitem.id.x()
				%7 = and i32 %5, 15
				%8 = mul nuw nsw i32 %7, 21
				%9 = sub i32 %8, 0
				%10 = add i32 %9, 0
				%11 = add i32 %10, 0
				%12 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0
				br label %14

				; <label>:13:
				br i1 undef, label %39, label %40

				; <label>:14:
				br i1 false, label %.preheader, label %.loopexit145

				.loopexit145:
				br label %15

				; <label>:15:
				%16 = phi i32 [ %7, %.loopexit145 ], [ %22, %.loopexit ]
				%17 = add nsw i32 %16, -3
				%18 = mul i32 %16, 21
				br i1 undef, label %19, label %.loopexit

				; <label>:19:
				%20 = mul i32 %17, 224
				%21 = add i32 undef, %20
				br label %23

				.loopexit:
				%22 = add nuw nsw i32 %16, 16
				br i1 undef, label %15, label %28

				; <label>:23:
				%24 = phi i32 [ %6, %19 ], [ %27, %23 ]
				%25 = add i32 %24, %18
				%26 = getelementptr inbounds float, float addrspace(3)* %12, i32 %25
				store float undef, float addrspace(3)* %26, align 4
				%27 = add nuw i32 %24, 8
				br i1 undef, label %23, label %.loopexit

				; <label>:28:
				br label %33

				.preheader:
				%29 = phi i32 [ %30, %.preheader ], [ undef, %14 ]
				%30 = add nuw i32 %29, 128
				%31 = icmp ult i32 %30, 1568
				br i1 %31, label %.preheader, label %.loopexit145

				; <label>:32:
				br i1 undef, label %13, label %14

				; <label>:33:
				%34 = phi i32 [ %11, %28 ], [ undef, %33 ]
				%35 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %34
				arsenmUnsubmitted Done Reply Inline Actions Run instnamer on this test arsenm: Run instnamer on this test
				msearlesAuthorUnsubmitted Not Done Reply Inline Actions Done. msearles: Done.
				%36 = load float, float addrspace(3)* %35, align 4
				%37 = tail call float @llvm.fmuladd.f32(float %36, float undef, float undef)
				%38 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %37)
				br i1 undef, label %32, label %33

				; <label>:39:
				br label %40

				; <label>:40:
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[ TargetLowering, AMDGPU] Use the return value of UpdateNodeOperands();
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 117376

lib/CodeGen/SelectionDAG/TargetLowering.cpp

lib/Target/AMDGPU/SIISelLowering.cpp

test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ TargetLowering, AMDGPU] Use the return value of UpdateNodeOperands(); ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 117376

lib/CodeGen/SelectionDAG/TargetLowering.cpp

lib/Target/AMDGPU/SIISelLowering.cpp

test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll

[ TargetLowering, AMDGPU] Use the return value of UpdateNodeOperands();
ClosedPublic