Diff 148641

lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Show First 20 Lines • Show All 354 Lines • ▼ Show 20 Lines	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
Ops.push_back(N->getOperand(i));		Ops.push_back(N->getOperand(i));
}		}
Ops.push_back(Glue);		Ops.push_back(Glue);
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);		return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}		}

static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {		static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
switch (NumVectorElts) {		switch (NumVectorElts) {
case 1:		case 1:
return AMDGPU::SReg_32_XM0RegClassID;		return AMDGPU::SReg_32_XM0RegClassID;
case 2:		case 2:
return AMDGPU::SReg_64RegClassID;		return AMDGPU::SReg_64RegClassID;
case 4:		case 4:
		nhaehnleUnsubmitted Done Reply Inline Actions Have you run clang-format on this? It looks a bit off. nhaehnle: Have you run clang-format on this? It looks a bit off.
return AMDGPU::SReg_128RegClassID;		return AMDGPU::SReg_128RegClassID;
case 8:		case 8:
return AMDGPU::SReg_256RegClassID;		return AMDGPU::SReg_256RegClassID;
case 16:		case 16:
return AMDGPU::SReg_512RegClassID;		return AMDGPU::SReg_512RegClassID;
}		}

llvm_unreachable("invalid vector size");		llvm_unreachable("invalid vector size");
▲ Show 20 Lines • Show All 612 Lines • ▼ Show 20 Lines	bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
// FIXME: This is broken on SI where we still need to check if the base		// FIXME: This is broken on SI where we still need to check if the base
// pointer is positive here.		// pointer is positive here.
Base = Addr;		Base = Addr;
Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);		Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);		Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
return true;		return true;
}		}


		nhaehnleUnsubmitted Done Reply Inline Actions Spurious whitespace change. nhaehnle: Spurious whitespace change.
		static MachineSDNode *buildSMovImm64(SelectionDAG &DAG, const SDLoc &DL,
		uint64_t Val) {
		SDValue ValLo = DAG.getTargetConstant(Val & 0xffffffffU, DL, MVT::i32);
		SDValue ValHi = DAG.getTargetConstant(Val >> 32, DL, MVT::i32);
		const SDValue Ops0[] = {
		DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
		arsenmUnsubmitted Not Done Reply Inline Actions SReg_64_XEXEC arsenm: SReg_64_XEXEC
		arsenmUnsubmitted Done Reply Inline Actions Probably should also create a helper that the constant lowering can use as well since this is basically the same arsenm: Probably should also create a helper that the constant lowering can use as well since this is…
		tprAuthorUnsubmitted Not Done Reply Inline Actions I left the SGPR_64RegClassID as that's what the existing code for a 64 bit constant load has. I have done the suggested helper func. tpr: I left the SGPR_64RegClassID as that's what the existing code for a 64 bit constant load has. I…
		SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, ValLo), 0),
		DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
		SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, ValHi), 0),
		DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
		};
		return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v2i32, Ops0);
		}

bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,		bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &VAddr, SDValue &SOffset,		SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &Offen,		SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,		SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,		SDValue &GLC, SDValue &SLC,
SDValue &TFE) const {		SDValue &TFE) const {
// Subtarget prefers to use flat instruction		// Subtarget prefers to use flat instruction
if (Subtarget->useFlatForGlobal())		if (Subtarget->useFlatForGlobal())
Show All 17 Lines	if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N1 = Addr.getOperand(1);		SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);		ConstantSDNode *C1 = cast<ConstantSDNode>(N1);

if (N0.getOpcode() == ISD::ADD) {		if (N0.getOpcode() == ISD::ADD) {
// (add (add N2, N3), C1) -> addr64		// (add (add N2, N3), C1) -> addr64
SDValue N2 = N0.getOperand(0);		SDValue N2 = N0.getOperand(0);
SDValue N3 = N0.getOperand(1);		SDValue N3 = N0.getOperand(1);
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);		Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
		if (N2->isDivergent()) {
		if (N3->isDivergent()) {
		// Both N2 and N3 are divergent. Keep the add and use N2+N3 as the
		// vaddr, and construct the resource out of 0.
		Ptr = SDValue(buildSMovImm64(*CurDAG, DL, 0), 0);
		VAddr = N0;
		} else {
		// N2 is divergent, N3 is not.
		Ptr = N3;
		VAddr = N2;
		}
		} else {
		// N2 is not divergent.
Ptr = N2;		Ptr = N2;
VAddr = N3;		VAddr = N3;
		}
} else {		} else {
// (add N0, C1) -> offset		// (add N0, C1) -> offset
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);		VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = N0;		Ptr = N0;
}		}

if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {		if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);		Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
Show All 10 Lines	if (CurDAG->isBaseWithConstantOffset(Addr)) {
}		}
}		}

if (Addr.getOpcode() == ISD::ADD) {		if (Addr.getOpcode() == ISD::ADD) {
// (add N0, N1) -> addr64		// (add N0, N1) -> addr64
SDValue N0 = Addr.getOperand(0);		SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);		SDValue N1 = Addr.getOperand(1);
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);		Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);

		if (N0->isDivergent()) {
		if (N1->isDivergent()) {
		// Both N0 and N1 are divergent. Use the result of the add as the
		// addr64, and construct the resource from a 0 address.
		Ptr = SDValue(buildSMovImm64(*CurDAG, DL, 0), 0);
		VAddr = Addr;
		} else {
		// N0 is divergent, N1 is not.
		Ptr = N1;
		VAddr = N0;
		}
		} else {
		// N0 is not divergent.
Ptr = N0;		Ptr = N0;
VAddr = N1;		VAddr = N1;
		}
		nhaehnleUnsubmitted Done Reply Inline Actions This has a lot of redundancy with the isBaseWithConstantOffset case above. Perhaps the cases can be combined? nhaehnle: This has a lot of redundancy with the isBaseWithConstantOffset case above. Perhaps the cases…
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);		Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;		return true;
}		}

// default case -> offset		// default case -> offset
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);		VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = Addr;		Ptr = Addr;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);		Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
▲ Show 20 Lines • Show All 1,182 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll

This file was added.

				; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s \| FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SICI %s

				; GCN-LABEL: {{^}}main:
				; GCN-NOT: readfirstlane
				; SICI: buffer_load_dwordx4 {{.*}} addr64

				@indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0.000000e+00>]

				define amdgpu_ps float @main(i32 %arg18) {
				.entry:
				%tmp31 = sext i32 %arg18 to i64
				%tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31
				%tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
				%tmp34 = extractelement <3 x float> %tmp33, i32 0
				ret float %tmp34
				}

				nhaehnleUnsubmitted Not Done Reply Inline Actions Could you please add a similar test-case, with a non-uniform i64 %arg18 and %offset a constant? I don't think this case is covered by tests, and I'm not sure that the code would do the right thing for that case, where I think Addr64 would also be needed. nhaehnle: Could you please add a similar test-case, with a non-uniform i64 %arg18 and %offset a constant?
				tprAuthorUnsubmitted Not Done Reply Inline Actions Good spot; that case was not covered. tpr: Good spot; that case was not covered.

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Avoid using divergent value in mubuf addr64 descriptor
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 148641

lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Avoid using divergent value in mubuf addr64 descriptorClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 148641

lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll

[AMDGPU] Avoid using divergent value in mubuf addr64 descriptor
ClosedPublic