diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10046,6 +10046,10 @@ Opcode = AArch64ISD::SQSHLU_I; IsRightShift = false; break; + case Intrinsic::aarch64_neon_sshl: + Opcode = AArch64ISD::VSHL; + IsRightShift = false; + break; } if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { @@ -10132,6 +10136,7 @@ case Intrinsic::aarch64_neon_sqshlu: case Intrinsic::aarch64_neon_srshl: case Intrinsic::aarch64_neon_urshl: + case Intrinsic::aarch64_neon_sshl: return tryCombineShiftImm(IID, N, DAG); case Intrinsic::aarch64_crc32b: case Intrinsic::aarch64_crc32cb: diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -1219,6 +1219,65 @@ ret <2 x i64> %tmp3 } +declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>) + +define <16 x i8> @neon.sshll16b_constant_shift(<16 x i8>* %A) nounwind { +;CHECK-LABEL: neon.sshll16b_constant_shift +;CHECK: shl.16b v0, {{v[0-9]+}}, #1 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind { +;CHECK-LABEL: neon.sshll8h_constant_shift +;CHECK: sshll.8h v0, {{v[0-9]+}}, #1 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @neon.sshll4s_constant_shift(<4 x i16>* %A) nounwind { +;CHECK-LABEL: neon.sshll4s_constant_shift +;CHECK: sshll.4s v0, {{v[0-9]+}}, #1 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +; FIXME: unnecessary sshll.4s v0, v0, #0? +define <4 x i32> @neon.sshll4s_neg_constant_shift(<4 x i16>* %A) nounwind { +;CHECK-LABEL: neon.sshll4s_neg_constant_shift +;CHECK: movi.2d v1, #0xffffffffffffffff +;CHECK: sshll.4s v0, v0, #0 +;CHECK: sshl.4s v0, v0, v1 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +; FIXME: should be constant folded. +define <4 x i32> @neon.sshll4s_constant_fold() nounwind { +;CHECK-LABEL: neon.sshll4s_constant_fold +;CHECK: shl.4s v0, {{v[0-9]+}}, #1 + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @neon.sshll2d_constant_shift(<2 x i32>* %A) nounwind { +;CHECK-LABEL: neon.sshll2d_constant_shift +;CHECK: sshll.2d v0, {{v[0-9]+}}, #1 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) + ret <2 x i64> %tmp3 +} define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind { ;CHECK-LABEL: sshll2_8h: ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1