Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -105,7 +105,7 @@ class AdvSIMD_1VectorArg_Long_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>; class AdvSIMD_1IntArg_Narrow_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty], [IntrNoMem]>; class AdvSIMD_1VectorArg_Narrow_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>; class AdvSIMD_1VectorArg_Int_Across_Intrinsic Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4622,7 +4622,18 @@ case Intrinsic::aarch64_neon_umin: return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); - + case Intrinsic::aarch64_neon_scalar_sqxtn: + case Intrinsic::aarch64_neon_scalar_sqxtun: + case Intrinsic::aarch64_neon_scalar_uqxtn: { + assert(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::f32); + if (Op.getValueType() == MVT::i32) + return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::i32, + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(Op), + MVT::f32, Op.getOperand(0), + DAG.getNode(ISD::BITCAST, SDLoc(Op), + MVT::f64, Op.getOperand(1)))); + return SDValue(); + } case Intrinsic::aarch64_sve_sunpkhi: return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), Op.getOperand(1)); Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7207,7 +7207,7 @@ multiclass SIMDTwoScalarMixedBHS opc, string asm, SDPatternOperator OpNode = null_frag> { def v1i32 : BaseSIMDTwoScalar; + [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn)))]>; def v1i16 : BaseSIMDTwoScalar; def v1i8 : BaseSIMDTwoScalar; } Index: llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll +++ llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll @@ -193,8 +193,7 @@ define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { ; CHECK-LABEL: uqxtn_ext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, v3.d[1] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[0], v3.d[1] ; CHECK-NEXT: uqxtn s0, d0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -209,8 +208,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, x0 ; CHECK-NEXT: sqxtn s1, d1 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: ret entry: %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %c) @@ -221,11 +219,9 @@ define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) { ; CHECK-LABEL: sqxtun_insext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v1.d[0], v1.d[1] ; CHECK-NEXT: sqxtun s1, d1 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: ret entry: %c = extractelement <2 x i64> %e, i64 1 @@ -239,8 +235,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: saddlv d1, v1.4s ; CHECK-NEXT: sqxtn s1, d1 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: ret entry: %vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %b)