Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8496,6 +8496,15 @@ (vector_extract (v8f16 FPR128:$Rn), (i64 1))), (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; +// Prefer using the bottom lanes of faddp Rn, Rn compared to +// faddp extractlow(Rn), extracthigh(Rn) +def : Pat<(AArch64faddp (extract_subvector (v4f32 FPR128:$Rn), (i64 0)), + (extract_subvector (v4f32 FPR128:$Rn), (i64 2))), + (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; +def : Pat<(AArch64faddp (extract_subvector (v8f16 FPR128:$Rn), (i64 0)), + (extract_subvector (v8f16 FPR128:$Rn), (i64 4))), + (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; + // Scalar 64-bit shifts in FPR64 registers. def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; Index: llvm/test/CodeGen/AArch64/faddp.ll =================================================================== --- llvm/test/CodeGen/AArch64/faddp.ll +++ llvm/test/CodeGen/AArch64/faddp.ll @@ -260,8 +260,7 @@ ; CHECK-LABEL: faddp_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: faddp v0.2s, v0.2s, v1.2s +; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s ; CHECK-NEXT: faddp s0, v0.2s ; CHECK-NEXT: ret %1 = fadd <4 x float> %a, %b @@ -278,8 +277,8 @@ ; CHECK-LABEL: faddp_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: faddp v0.4h, v0.4h, v1.4h +; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = fadd <8 x half> %a, %b %2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32>