Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -5050,10 +5050,16 @@ if (M.size() != NumElts && M.size() != NumElts*2) return false; - // If the mask is twice as long as the result then we need to check the upper - // and lower parts of the mask + // If the mask is twice as long as the input vector then we need to check the + // upper and lower parts of the mask with a matching value for WhichResult + // FIXME: A mask with only even values will be rejected in case the first + // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only + // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) @@ -5080,7 +5086,10 @@ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) Index: test/CodeGen/ARM/vtrn.ll =================================================================== --- test/CodeGen/ARM/vtrn.ll +++ test/CodeGen/ARM/vtrn.ll @@ -371,3 +371,31 @@ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 ret <8 x i8> %rv } + +; Negative test that should not generate a vtrn +define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { +entry: + ; CHECK-LABEL: lower_twice_no_vtrn + ; CHECK: @ BB#0: + ; CHECK-NOT: vtrn + ; CHECK: mov pc, lr + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> + store <8 x i16> %0, <8 x i16>* %C + ret void +} + +; Negative test that should not generate a vtrn +define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { +entry: + ; CHECK-LABEL: upper_twice_no_vtrn + ; CHECK: @ BB#0: + ; CHECK-NOT: vtrn + ; CHECK: mov pc, lr + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> + store <8 x i16> %0, <8 x i16>* %C + ret void +} Index: test/CodeGen/ARM/vuzp.ll =================================================================== --- test/CodeGen/ARM/vuzp.ll +++ test/CodeGen/ARM/vuzp.ll @@ -286,6 +286,18 @@ ret <4 x i32> %0 } +define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) { +entry: + ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn + ; CHECK-NOT: vtrn + ; CHECK: vuzp + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> + store <4 x i32> %0, <4 x i32>* %C + ret void +} + define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) { ; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8. ; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to Index: test/CodeGen/ARM/vzip.ll =================================================================== --- test/CodeGen/ARM/vzip.ll +++ test/CodeGen/ARM/vzip.ll @@ -295,3 +295,13 @@ ret <4 x i32> %0 } +define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) { +entry: + ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn + ; CHECK-NOT: vtrn + ; CHECK: vzip + %tmp1 = load <2 x i32>, <2 x i32>* %A + %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + store <4 x i32> %0, <4 x i32>* %B + ret void +}