diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18616,6 +18616,10 @@ } else return SDValue(); + // If the shift amount is zero, remove the shift intrinsic. + if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu) + return N->getOperand(1); + unsigned Opcode; bool IsRightShift; switch (IID) { diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -3435,3 +3435,97 @@ %c = ashr <1 x i64> %a, %b ret <1 x i64> %c } + +define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: sqshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: uqshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: srshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: urshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: sqshlu_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: sqshlu.2d v0, v0, #0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: sshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: ushl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)