diff --git a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll --- a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll @@ -60,4 +60,19 @@ ret void } +; Operand %4 is the higher-half of v2i64, and operand %2 is an input parameter of i64. +; Test that %2 is duplicated into the proper lane of SIMD directly for optimal codegen. +define void @test3(ptr %0, <2 x i64> %1, i64 %2) { +; CHECK-LABEL: test3: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v1.2d, x1 +; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %4 = extractelement <2 x i64> %1, i64 1 + %5 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %4, i64 %2) + store <16 x i8> %5, ptr %0, align 16 + ret void +} + declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) diff --git a/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll b/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll --- a/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll +++ b/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll @@ -62,19 +62,4 @@ ret void } -; Operand %4 is the higher-half of v2i64, and operand %2 is an input parameter of i64. -; Test that %2 is duplicated into the proper lane of SIMD directly for optimal codegen. -define void @test4(ptr %0, <2 x i64> %1, i64 %2) { -; CHECK-LABEL: test4: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.2d, x1 -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret - %4 = extractelement <2 x i64> %1, i64 1 - %5 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %4, i64 %2) - store <16 x i8> %5, ptr %0, align 16 - ret void -} - declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)