Diff 194299

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Show First 20 Lines • Show All 4,057 Lines • ▼ Show 20 Lines	if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
if (TLI.isTypeLegal(NewVT)) {		if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);		SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(		return DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,		ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));		DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}		}
}		}

		if (VT.isVector()) {
		efriedmaUnsubmitted Done Reply Inline Actions So if I'm following this correctly, this takes a cast like `<12 x i8>` -> `<3 x i32>`, and turns it into `<16 x i8>` -> `<4 x i32>`? That makes sense, but please add a comment describing it. efriedma: So if I'm following this correctly, this takes a cast like `<12 x i8>` -> `<3 x i32>`, and…
		EVT EltVT = VT.getVectorElementType();
		unsigned EltSize = EltVT.getSizeInBits();
		if (InWidenSize % EltSize == 0) {
		unsigned NewNumElts = InWidenSize / EltSize;
		EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
		if (TLI.isTypeLegal(NewVT)) {
		SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
		return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
		DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
		}
		}
		}

return CreateStackStoreLoad(InOp, VT);		return CreateStackStoreLoad(InOp, VT);
}		}

SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {		SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();		EVT EltVT = VT.getVectorElementType();
EVT InVT = N->getOperand(0).getValueType();		EVT InVT = N->getOperand(0).getValueType();
SDLoc dl(N);		SDLoc dl(N);
▲ Show 20 Lines • Show All 735 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll

This file was added.

				; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 <%s -stop-after=amdgpu-isel \| FileCheck -check-prefix=GCN %s

				; GCN-LABEL: body:
				; GCN-NOT: %stack
				arsenmUnsubmitted Done Reply Inline Actions I wouldn’t trust this to check this, a generated check would be better arsenm: I wouldn’t trust this to check this, a generated check would be better
				tprAuthorUnsubmitted Done Reply Inline Actions Not really sure what you're suggesting, but I hope this is better. tpr: Not really sure what you're suggesting, but I hope this is better.
				nhaehnleUnsubmitted Not Done Reply Inline Actions Maybe you can use `util/update_llc_test_checks.py`? nhaehnle: Maybe you can use `util/update_llc_test_checks.py`?
				tprAuthorUnsubmitted Done Reply Inline Actions You mean have a check line for each line of IR output in the function? Do you think that would be better than the negative check for storing to stack? tpr: You mean have a check line for each line of IR output in the function? Do you think that would…
				nhaehnleUnsubmitted Done Reply Inline Actions Yes, I do think so. Having the auto-generated assertions means that we catch other things going wrong, and it's easy enough to update them for benign changes. I realize that you actually need update_mir_test_checks in this case due to the -stop-after, and the script is sensitive to the fact that there's no space between the `<` and the `%s`. nhaehnle: Yes, I do think so. Having the auto-generated assertions means that we catch other things going…
				tprAuthorUnsubmitted Done Reply Inline Actions Thanks Nicolai. Now done. tpr: Thanks Nicolai. Now done.

				define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg noalias dereferenceable(18446744073709551615) %arg) {
				main_body:
				%tmp25 = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
				%tmp26 = shufflevector <3 x float> %tmp25, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
				%tmp27 = bitcast <4 x float> %tmp26 to <16 x i8>
				%tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
				%tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
				%tmp30 = extractelement <3 x i32> %tmp29, i32 0
				%tmp31 = extractelement <3 x i32> %tmp29, i32 1
				%tmp32 = extractelement <3 x i32> %tmp29, i32 2
				%tmp33 = call i32 @llvm.bitreverse.i32(i32 %tmp30) #5
				%tmp34 = call i32 @llvm.bitreverse.i32(i32 %tmp31) #5
				%tmp35 = call i32 @llvm.bitreverse.i32(i32 %tmp32) #5
				%tmp36 = insertelement <2 x i32> undef, i32 %tmp33, i32 0
				%tmp37 = insertelement <2 x i32> %tmp36, i32 %tmp34, i32 1
				%tmp38 = getelementptr [0 x i8], [0 x i8] addrspace(6)* %arg, i32 0, i32 16
				%tmp39 = bitcast i8 addrspace(6)* %tmp38 to <4 x i32> addrspace(6)*
				%tmp40 = load <4 x i32>, <4 x i32> addrspace(6)* %tmp39, align 16
				%tmp41 = bitcast <2 x i32> %tmp37 to <2 x float>
				call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %tmp41, <4 x i32> undef, i32 undef, i32 0, i32 0) #3
				%tmp43 = bitcast i32 %tmp35 to float
				call void @llvm.amdgcn.raw.buffer.store.f32(float %tmp43, <4 x i32> undef, i32 undef, i32 0, i32 0) #3
				ret void
				}

				declare i32 @llvm.bitreverse.i32(i32)
				declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
				declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
				declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg)

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Fixed de-optimization of legalize subvector extract
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 194299

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Fixed de-optimization of legalize subvector extractClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 194299

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll

[CodeGen] Fixed de-optimization of legalize subvector extract
ClosedPublic