Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -106,6 +106,12 @@ Optional instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const; + Optional simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function + SimplifyAndSetOp) const; + TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { switch (K) { case TargetTransformInfo::RGK_Scalar: Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1168,6 +1168,32 @@ return None; } +Optional AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts, + APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, + std::function + SimplifyAndSetOp) const { + switch (II.getIntrinsicID()) { + default: + break; + case Intrinsic::aarch64_neon_fcvtxn: + case Intrinsic::aarch64_neon_rshrn: + case Intrinsic::aarch64_neon_sqrshrn: + case Intrinsic::aarch64_neon_sqrshrun: + case Intrinsic::aarch64_neon_sqshrn: + case Intrinsic::aarch64_neon_sqshrun: + case Intrinsic::aarch64_neon_sqxtn: + case Intrinsic::aarch64_neon_sqxtun: + case Intrinsic::aarch64_neon_uqrshrn: + case Intrinsic::aarch64_neon_uqshrn: + case Intrinsic::aarch64_neon_uqxtn: + SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts); + break; + } + + return None; +} + bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, ArrayRef Args) { Index: llvm/test/Transforms/InstCombine/AArch64/demandelts.ll =================================================================== --- llvm/test/Transforms/InstCombine/AArch64/demandelts.ll +++ llvm/test/Transforms/InstCombine/AArch64/demandelts.ll @@ -3,8 +3,7 @@ define <2 x float> @fcvtxn(<2 x double> %d1) { ; CHECK-LABEL: @fcvtxn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[D1:%.*]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[A]]) +; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[D1:%.*]]) ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[I]], <2 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[S]] ; @@ -16,7 +15,7 @@ define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @rshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -29,7 +28,7 @@ define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqrshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -42,7 +41,7 @@ define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqrshrun( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -55,7 +54,7 @@ define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -68,7 +67,7 @@ define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqshrun( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -81,7 +80,7 @@ define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqxtn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]]) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -94,7 +93,7 @@ define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqxtun( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]]) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -107,7 +106,7 @@ define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @uqrshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -120,7 +119,7 @@ define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @uqshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[A]], i32 9) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] @@ -133,7 +132,7 @@ define <4 x i16> @uqxtn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @uqxtn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]]) ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]]