diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -205,6 +205,7 @@ bool tryBitfieldInsertOp(SDNode *N); bool tryBitfieldInsertInZeroOp(SDNode *N); bool tryShiftAmountMod(SDNode *N); + bool tryHighFPExt(SDNode *N); bool tryReadRegister(SDNode *N); bool tryWriteRegister(SDNode *N); @@ -1803,6 +1804,35 @@ return true; } +/// Try to form fcvtl2 instructions from a floating-point extend of a high-half +/// extract of a subvector. +bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { + assert(N->getOpcode() == ISD::FP_EXTEND); + + // There are 2 forms of fcvtl2 - extend to double or extend to float. + SDValue Extract = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT NarrowVT = Extract.getValueType(); + if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && + (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) + return false; + + // Optionally look past a bitcast. + Extract = peekThroughBitcasts(Extract); + if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return false; + + // Match extract from start of high half index. + // Example: v8i16 -> v4i16 means the extract must begin at index 4. + unsigned ExtractIndex = Extract.getConstantOperandVal(1); + if (ExtractIndex != Extract.getValueType().getVectorNumElements()) + return false; + + auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; + CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); + return true; +} + static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits = 0, @@ -3010,6 +3040,11 @@ return; break; + case ISD::FP_EXTEND: + if (tryHighFPExt(Node)) + return; + break; + case ISD::OR: if (tryBitfieldInsertOp(Node)) return; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3646,14 +3646,8 @@ (i64 4)))), (FCVTLv8i16 V128:$Rn)>; def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn), - (i64 2))))), - (FCVTLv4i32 V128:$Rn)>; def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; -def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn), - (i64 4))))), - (FCVTLv8i16 V128:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -39,21 +39,14 @@ } define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v1f64_f32_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.2d, v0.2s -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v1f64_f32_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: fcvtl2 v0.2d, v0.4s -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v1f64_f32_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %bc1 = bitcast <4 x float> %x to <2 x double> %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> @@ -63,23 +56,14 @@ } define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v1i64_f32_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.2d, v0.2s -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v1i64_f32_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.2d, v0.2s -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v1i64_f32_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> %bc2 = bitcast <1 x i64> %ext to <2 x float> @@ -88,23 +72,14 @@ } define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v2i32_f32_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.2d, v0.2s -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v2i32_f32_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.2d, v0.2s -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v2i32_f32_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> %bc2 = bitcast <2 x i32> %ext to <2 x float> @@ -113,23 +88,14 @@ } define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v4i16_f32_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.2d, v0.2s -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v4i16_f32_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.2d, v0.2s -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v4i16_f32_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> %bc2 = bitcast <4 x i16> %ext to <2 x float> @@ -138,23 +104,14 @@ } define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v8i8_f32_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.2d, v0.2s -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v8i8_f32_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.2d, v0.2s -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v8i8_f32_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> %bc2 = bitcast <8 x i8> %ext to <2 x float> @@ -163,23 +120,14 @@ } define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v1i64_f16_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.4s, v0.4h -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v1i64_f16_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.4s, v0.4h -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v1i64_f16_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.4s, v0.4h +; GISEL-NEXT: fcvtl2 v0.4s, v0.8h ; GISEL-NEXT: ret %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> %bc2 = bitcast <1 x i64> %ext to <4 x half> @@ -188,23 +136,14 @@ } define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v2i32_f16_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.4s, v0.4h -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v2i32_f16_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.4s, v0.4h -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v2i32_f16_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.4s, v0.4h +; GISEL-NEXT: fcvtl2 v0.4s, v0.8h ; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> %bc2 = bitcast <2 x i32> %ext to <4 x half> @@ -213,23 +152,14 @@ } define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v4i16_f16_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.4s, v0.4h -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v4i16_f16_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.4s, v0.4h -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v4i16_f16_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.4s, v0.4h +; GISEL-NEXT: fcvtl2 v0.4s, v0.8h ; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> %bc2 = bitcast <4 x i16> %ext to <4 x half> @@ -238,23 +168,14 @@ } define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_v8i8_f16_bitcast: -; GENERIC: // %bb.0: -; GENERIC-NEXT: ext.16b v0, v0, v0, #8 -; GENERIC-NEXT: fcvtl v0.4s, v0.4h -; GENERIC-NEXT: ret -; -; FAST-LABEL: test_vcvt_high_v8i8_f16_bitcast: -; FAST: // %bb.0: -; FAST-NEXT: ext.16b v0, v0, v0, #8 -; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0 -; FAST-NEXT: fcvtl v0.4s, v0.4h -; FAST-NEXT: ret +; CHECK-LABEL: test_vcvt_high_v8i8_f16_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.4s, v0.4h +; GISEL-NEXT: fcvtl2 v0.4s, v0.8h ; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> %bc2 = bitcast <8 x i8> %ext to <4 x half>