diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14995,6 +14995,36 @@ return SDValue(); } +static SDValue pushAndThroughExtractSubvector(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (LHS->getOpcode() != ISD::EXTRACT_SUBVECTOR || + !LHS->hasOneUse()) + return SDValue(); + ConstantSDNode *SplatVal = isConstOrConstSplat(RHS); + if (!SplatVal) + return SDValue(); + SDValue AnyExt = LHS->getOperand(0); + if (AnyExt->getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + EVT AnyExtVT = AnyExt->getValueType(0); + SDValue Extendee = AnyExt->getOperand(0); + + unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits(); + if (!SplatVal->getAPIntValue().isMask(ScalarWidth)) + return SDValue(); + + // (and (extract_subvector (anyext v) _) iN_mask) + // => (extract_subvector (iN_zeroext v)) + SDValue ZeroExtExtendee = + DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), AnyExtVT, Extendee); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee, + LHS->getOperand(1)); +} + static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; @@ -15008,6 +15038,9 @@ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); + if (SDValue R = pushAndThroughExtractSubvector(N, DAG)) + return R; + if (VT.isScalableVector()) return performSVEAndCombine(N, DCI); diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -503,23 +503,16 @@ define <8 x double> @uitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: uitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: and v3.8b, v2.8b, v1.8b -; CHECK-NEXT: and v4.8b, v0.8b, v1.8b -; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-NEXT: ushll v4.2d, v4.2s, #0 -; CHECK-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v5.2d, v2.2s, #0 -; CHECK-NEXT: ucvtf v2.2d, v3.2d -; CHECK-NEXT: ushll v1.2d, v0.2s, #0 -; CHECK-NEXT: ucvtf v0.2d, v4.2d -; CHECK-NEXT: ucvtf v1.2d, v1.2d -; CHECK-NEXT: ucvtf v3.2d, v5.2d +; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-NEXT: ushll v4.2d, v1.2s, #0 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ucvtf v1.2d, v3.2d +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v3.2d, v2.2d +; CHECK-NEXT: ucvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = uitofp <8 x i16> %a to <8 x double> ret <8 x double> %1