Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -5982,8 +5982,22 @@ } } - if (M.size() == NumElts*2) + if (M.size() == NumElts*2) { + // If the output is a two register pair, we assume WhichResult to be 0 + // for the first output reg, and 1 for the other one. WhichResult = 0; + // Check this assumption - if M[NumElts] == 0, WhichRegister was 0 + // for the second half. + if (M[NumElts] == 0) + return false; + // If M[0] was nonzero, WhichRegister was 1 for the first half. That's ok + // only if all of them were undef. + if (M[0] != 0) { + for (unsigned i = 0; i < NumElts; i++) + if (M[i] >= 0) + return false; + } + } // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) Index: test/CodeGen/ARM/vzip.ll =================================================================== --- test/CodeGen/ARM/vzip.ll +++ test/CodeGen/ARM/vzip.ll @@ -282,6 +282,23 @@ ret <8 x i16> %0 } +define <8 x i16> @vzip_lower_shufflemask_undef2(<4 x i16>* %A, <4 x i16>* %B) { +; CHECK-LABEL: vzip_lower_shufflemask_undef2: +; CHECK: @ BB#0: @ %entry +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d19, [r0] +; CHECK-NEXT: vtrn.16 d19, d16 +; CHECK-NEXT: vmov r0, r1, d18 +; CHECK-NEXT: vmov r2, r3, d19 +; CHECK-NEXT: mov pc, lr +entry: + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %cat = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> + %res = shufflevector <8 x i16> %cat, <8 x i16> undef, <8 x i32> + ret <8 x i16> %res +} + define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) { ; CHECK-LABEL: vzip_lower_shufflemask_zeroed: ; CHECK: @ BB#0: @ %entry