Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1486,6 +1486,11 @@ /// SetCC operation. CondCode getSetCCInverse(CondCode Operation, EVT Type); +inline bool isExtOpcode(unsigned Opcode) { + return Opcode == ISD::ANY_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::SIGN_EXTEND; +} + namespace GlobalISel { /// Return the operation corresponding to !(X op Y), where 'op' is a valid /// SetCC operation. The U bit of the condition code has different meanings Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6356,6 +6356,24 @@ } } + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue AnyExt = N0.getOperand(0); + EVT AnyExtVT = AnyExt->getValueType(0); + SDValue Extendee = AnyExt->getOperand(0); + + unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits(); + if (N1C->getAPIntValue().isMask(ScalarWidth)) { + // (and (extract_subvector (zext|anyext|sext v) _) iN_mask) + // => (extract_subvector (iN_zeroext v)) + SDValue ZeroExtExtendee = + DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), AnyExtVT, Extendee); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee, + N0.getOperand(1)); + } + } + // fold (and (masked_gather x)) -> (zext_masked_gather x) if (auto *GN0 = dyn_cast(N0)) { EVT MemVT = GN0->getMemoryVT(); Index: llvm/test/CodeGen/AArch64/extract-subvec-combine.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/extract-subvec-combine.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK + +define <2 x i32> @and_extract_zext_idx0(<4 x i16> %vec) nounwind { +; CHECK-LABEL: and_extract_zext_idx0: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %zext = zext <4 x i16> %vec to <4 x i32> + %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 0) + %and = and <2 x i32> %extract, + ret <2 x i32> %and +} + +define <4 x i16> @and_extract_sext_idx0(<8 x i8> %vec) nounwind { +; CHECK-LABEL: and_extract_sext_idx0: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %zext = sext <8 x i8> %vec to <8 x i16> + %extract = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %zext, i64 0) + %and = and <4 x i16> %extract, + ret <4 x i16> %and +} + +define <2 x i32> @and_extract_zext_idx2(<4 x i16> %vec) nounwind { +; CHECK-LABEL: and_extract_zext_idx2: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %zext = zext <4 x i16> %vec to <4 x i32> + %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 2) + %and = and <2 x i32> %extract, + ret <2 x i32> %and +} + +define <4 x i16> @and_extract_sext_idx4(<8 x i8> %vec) nounwind { +; CHECK-LABEL: and_extract_sext_idx4: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %zext = sext <8 x i8> %vec to <8 x i16> + %extract = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %zext, i64 4) + %and = and <4 x i16> %extract, + ret <4 x i16> %and +} + +declare <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32>, i64) +declare <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16>, i64) Index: llvm/test/CodeGen/AArch64/vector-fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -503,23 +503,16 @@ define <8 x double> @uitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: uitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: and v3.8b, v2.8b, v1.8b -; CHECK-NEXT: and v4.8b, v0.8b, v1.8b -; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-NEXT: ushll v4.2d, v4.2s, #0 -; CHECK-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v5.2d, v2.2s, #0 -; CHECK-NEXT: ucvtf v2.2d, v3.2d -; CHECK-NEXT: ushll v1.2d, v0.2s, #0 -; CHECK-NEXT: ucvtf v0.2d, v4.2d -; CHECK-NEXT: ucvtf v1.2d, v1.2d -; CHECK-NEXT: ucvtf v3.2d, v5.2d +; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-NEXT: ushll v4.2d, v1.2s, #0 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ucvtf v1.2d, v3.2d +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v3.2d, v2.2d +; CHECK-NEXT: ucvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = uitofp <8 x i16> %a to <8 x double> ret <8 x double> %1