Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14469,7 +14469,7 @@ MaxIndex = std::max(MaxIndex, Index); } - NearestPow2 = PowerOf2Ceil(MaxIndex); + NearestPow2 = PowerOf2Ceil(MaxIndex + 1); if (InVT.isSimple() && (NearestPow2 > 2) && ((NumElems * 2) < NearestPow2)) { unsigned SplitSize = NearestPow2 / 2; Index: test/CodeGen/ARM/crash-on-pow2-shufflevector.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/crash-on-pow2-shufflevector.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=armv7 | FileCheck %s +; +; Ensure that don't crash given a largeish power-of-two shufflevector index. + +%struct.desc = type { i32, [7 x i32] } + +define i32 @foo(%struct.desc* %descs, i32 %num, i32 %cw) local_unnamed_addr #0 { +; CHECK-LABEL: foo: +; CHECK: @ BB#0: @ %entry +; CHECK-NEXT: mov r1, #32 +; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r1 +; CHECK-NEXT: vld1.32 {d18, d19}, [r0] +; CHECK-NEXT: vtrn.32 q8, q9 +; CHECK-NEXT: vadd.i32 d16, d16, d16 +; CHECK-NEXT: vmov.32 r0, d16[1] +; CHECK-NEXT: bx lr +entry: + %descs.vec = bitcast %struct.desc* %descs to <16 x i32>* + %wide.vec = load <16 x i32>, <16 x i32>* %descs.vec, align 4 + %strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <2 x i32> + %bin.rdx20 = add <2 x i32> %strided.vec, %strided.vec + %0 = extractelement <2 x i32> %bin.rdx20, i32 1 + ret i32 %0 +} Index: test/CodeGen/X86/avx512-shuffles/partial_permute.ll =================================================================== --- test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2682,10 +2682,9 @@ define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) { ; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp @@ -2696,11 +2695,11 @@ ; CHECK-LABEL: test_masked_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 -; CHECK-NEXT: vextracti32x4 $2, %zmm1, %xmm2 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; CHECK-NEXT: movb $2, %al ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] +; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp @@ -2713,11 +2712,11 @@ ; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: movb $2, %al ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp @@ -4527,7 +4526,7 @@ ; CHECK-LABEL: test_masked_8xdouble_to_2xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: vmovapd (%rdi), %zmm1 -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm2 +; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2 ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[0] @@ -4543,7 +4542,7 @@ ; CHECK-LABEL: test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: vmovapd (%rdi), %zmm0 -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] Index: test/CodeGen/X86/vector-shuffle-512-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-512-v8.ll +++ test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2685,17 +2685,19 @@ define <2 x double> @test_v8f64_34 (<8 x double> %v) { ; AVX512F-LABEL: test_v8f64_34: ; AVX512F: # BB#0: -; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2] +; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3] +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_v8f64_34: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2] +; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3] +; AVX512F-32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512F-32-NEXT: vzeroupper ; AVX512F-32-NEXT: retl %res = shufflevector <8 x double> %v, <8 x double> undef, <2 x i32>