This is an archive of the discontinued LLVM Phabricator instance.

[X86] In lowerVectorShuffleAsBroadcast, make peeking through CONCAT_VECTORS work correctly if we already walked through a bitcast that changed the element size.
ClosedPublic

Authored by craig.topper on Oct 29 2018, 11:27 AM.

Download Raw Diff

Details

Reviewers

Commits

rG6958b5ffa9ed: [X86] In lowerVectorShuffleAsBroadcast, make peeking through CONCAT_VECTORS…
rL345626: [X86] In lowerVectorShuffleAsBroadcast, make peeking through CONCAT_VECTORS…

Summary

The CONCAT_VECTORS case was using the original mask element count to determine how to adjust the broadcast index. But if we looked through a bitcast the original mask size doesn't tell us anything about the concat_vectors.

This patch switchs to using the concat_vectors input element count directly instead.

This caused a crash while doing experiments with -mprefer-vector-width=256 with skylake-avx512 on some benchmarks. All the types present in the crash were 256 or 128 bits wide so I'm not sure why -mprefer-vector-width=256 was relevant.

I don't currently have a reduced test case, but I'll see what I can do.

Diff Detail

Repository: rL LLVM

Event Timeline

craig.topper created this revision.Oct 29 2018, 11:27 AM

Here's a test case that crashes with -mattr=avx2

define <8 x float> @foo(<4 x float> %x, <4 x float> %y, float %z) {
entry:
  %tmp = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %bc = bitcast <8 x float> %tmp to <4 x i64>
  %tmp1 = extractelement <4 x i64> %bc, i32 3
  %tmp2 = bitcast i64 %tmp1 to <2 x float>
  %tmp4 = extractelement <2 x float> %tmp2, i32 1
  %tmp5 = insertelement <8 x float> undef, float %tmp4, i32 4
  %tmp6 = insertelement <8 x float> %tmp5, float %z, i32 5
  ret <8 x float> %tmp6
}

LGTM - please can you add that test to the vector-shuffle-256-v8.ll file?

This revision is now accepted and ready to land.Oct 30 2018, 2:09 AM

Closed by commit rL345626: [X86] In lowerVectorShuffleAsBroadcast, make peeking through CONCAT_VECTORS… (authored by ctopper). · Explain WhyOct 30 2018, 11:51 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86ISelLowering.cpp

3 lines

test/

CodeGen/

X86/

vector-shuffle-256-v8.ll

29 lines

Diff 171753

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 11,232 Lines • ▼ Show 20 Lines	case ISD::BITCAST: {
(BroadcastIdx % (NumSrcBits / NumEltBits)) == 0)		(BroadcastIdx % (NumSrcBits / NumEltBits)) == 0)
BroadcastIdx /= (NumSrcBits / NumEltBits);		BroadcastIdx /= (NumSrcBits / NumEltBits);
else		else
break;		break;
V = VSrc;		V = VSrc;
continue;		continue;
}		}
case ISD::CONCAT_VECTORS: {		case ISD::CONCAT_VECTORS: {
int OperandSize = Mask.size() / V.getNumOperands();		int OperandSize =
		V.getOperand(0).getSimpleValueType().getVectorNumElements();
V = V.getOperand(BroadcastIdx / OperandSize);		V = V.getOperand(BroadcastIdx / OperandSize);
BroadcastIdx %= OperandSize;		BroadcastIdx %= OperandSize;
continue;		continue;
}		}
case ISD::INSERT_SUBVECTOR: {		case ISD::INSERT_SUBVECTOR: {
SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);		SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));		auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
if (!ConstantIdx)		if (!ConstantIdx)
▲ Show 20 Lines • Show All 30,357 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll

	Show First 20 Lines • Show All 2,842 Lines • ▼ Show 20 Lines
	; AVX512VL-FAST-NEXT: vpaddd %ymm3, %ymm2, %ymm0			; AVX512VL-FAST-NEXT: vpaddd %ymm3, %ymm2, %ymm0
	; AVX512VL-FAST-NEXT: retq			; AVX512VL-FAST-NEXT: retq
	entry:			entry:
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6>
	%shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7>			%shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7>
	%add = add <8 x i32> %shuffle, %shuffle1			%add = add <8 x i32> %shuffle, %shuffle1
	ret <8 x i32> %add			ret <8 x i32> %add
	}			}

				; This test used to crash due to bad handling of concat_vectors after a bitcast
				; in lowerVectorShuffleAsBroadcast.
				define <8 x float> @broadcast_concat_crash(<4 x float> %x, <4 x float> %y, float %z) {
				; AVX1-LABEL: broadcast_concat_crash:
				; AVX1: # %bb.0: # %entry
				; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,3,1,1]
				; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
				; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
				; AVX1-NEXT: retq
				;
				; AVX2OR512VL-LABEL: broadcast_concat_crash:
				; AVX2OR512VL: # %bb.0: # %entry
				; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
				; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,2,3]
				; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,3,3]
				; AVX2OR512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
				; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
				; AVX2OR512VL-NEXT: retq
				entry:
				%tmp = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
				%bc = bitcast <8 x float> %tmp to <4 x i64>
				%tmp1 = extractelement <4 x i64> %bc, i32 3
				%tmp2 = bitcast i64 %tmp1 to <2 x float>
				%tmp4 = extractelement <2 x float> %tmp2, i32 1
				%tmp5 = insertelement <8 x float> undef, float %tmp4, i32 4
				%tmp6 = insertelement <8 x float> %tmp5, float %z, i32 5
				ret <8 x float> %tmp6
				}