Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -3806,6 +3806,13 @@ def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx))), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx))), i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; + // Extracting i8 or i16 elements will have the zero-extend transformed to // an 'and' mask by type legalization since neither i8 nor i16 are legal types // for AArch64. Match these patterns here since UMOV already zeroes out the high Index: test/CodeGen/AArch64/arm64-neon-copy.ll =================================================================== --- test/CodeGen/AArch64/arm64-neon-copy.ll +++ test/CodeGen/AArch64/arm64-neon-copy.ll @@ -320,21 +320,20 @@ ret i32 %tmp5 } -define i32 @smovx16b(<16 x i8> %tmp1) { +define i64 @smovx16b(<16 x i8> %tmp1) { ; CHECK-LABEL: smovx16b: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 %tmp4, %tmp4 - ret i32 %tmp5 + %tmp4 = sext i8 %tmp3 to i64 + ret i64 %tmp4 } -define i32 @smovx8h(<8 x i16> %tmp1) { +define i64 @smovx8h(<8 x i16> %tmp1) { ; CHECK-LABEL: smovx8h: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 + %tmp4 = sext i16 %tmp3 to i64 + ret i64 %tmp4 } define i64 @smovx4s(<4 x i32> %tmp1) {