Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6292,6 +6292,8 @@ static bool isTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || @@ -6306,6 +6308,8 @@ /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. static bool isZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { @@ -6342,6 +6346,8 @@ /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. static bool isTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || Index: llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-linux -o - | FileCheck %s + +define void @test(i32* %p1, i32* %p2) { +; CHECK-LABEL: test: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: str w9, [x1] +; CHECK-NEXT: ret + %tmp = shufflevector <1 x i32> , <1 x i32> undef, <3 x i32> + %tmp2 = shufflevector <3 x i32> , <3 x i32> %tmp, <3 x i32> + %tmp3 = shufflevector <3 x i32> %tmp2, <3 x i32> undef, <6 x i32> + %tmp4 = shufflevector <6 x i32> undef, <6 x i32> %tmp3, <9 x i32> + %tmp6 = extractelement <9 x i32> %tmp4, i32 7 + %tmp8 = extractelement <9 x i32> %tmp4, i32 8 + store i32 %tmp6, i32* %p1, align 4 + store i32 %tmp8, i32* %p2, align 4 + ret void +} + +; Test case from PR41951 +define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: widen_shuffles_reduced: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8 +; CHECK-NEXT: ret + %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> + ret <4 x i32> %s3 +}