diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -553,6 +553,14 @@ def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))), (DUP_ZI_D $a, $b)>; + // Duplicate immediate FP into all vector elements. + def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))), + (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; + def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))), + (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; + def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))), + (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; + // Duplicate FP immediate into all vector elements let AddedComplexity = 2 in { def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)), diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll @@ -130,12 +130,37 @@ ret %out } +define @dup_fmov_imm_f32_2() { +; CHECK-LABEL: dup_fmov_imm_f32_2: +; CHECK: mov w8, #1109917696 +; CHECK-NEXT: mov z0.s, w8 + %out = tail call @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01) + ret %out +} + +define @dup_fmov_imm_f32_4() { +; CHECK-LABEL: dup_fmov_imm_f32_4: +; CHECK: mov w8, #1109917696 +; CHECK-NEXT: mov z0.s, w8 + %out = tail call @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01) + ret %out +} + +define @dup_fmov_imm_f64_2() { +; CHECK-LABEL: dup_fmov_imm_f64_2: +; CHECK: mov x8, #4631107791820423168 +; CHECK-NEXT: mov z0.d, x8 + %out = tail call @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01) + ret %out +} + declare @llvm.aarch64.sve.dup.x.nxv16i8( i8) declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) declare @llvm.aarch64.sve.dup.x.nxv8f16(half) declare @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat) +declare @llvm.aarch64.sve.dup.x.nxv2f32(float) declare @llvm.aarch64.sve.dup.x.nxv4f32(float) declare @llvm.aarch64.sve.dup.x.nxv2f64(double) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -372,5 +372,32 @@ ret %r } +define @splat_nxv2f32_fmov_fold() { +; CHECK-LABEL: splat_nxv2f32_fmov_fold +; CHECK: mov w8, #1109917696 +; CHECK-NEXT: mov z0.s, w8 + %1 = insertelement undef, float 4.200000e+01, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv4f32_fmov_fold() { +; CHECK-LABEL: splat_nxv4f32_fmov_fold +; CHECK: mov w8, #1109917696 +; CHECK-NEXT: mov z0.s, w8 + %1 = insertelement undef, float 4.200000e+01, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv2f64_fmov_fold() { +; CHECK-LABEL: splat_nxv2f64_fmov_fold +; CHECK: mov x8, #4631107791820423168 +; CHECK-NEXT: mov z0.d, x8 + %1 = insertelement undef, double 4.200000e+01, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" }