Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -6157,6 +6157,86 @@ def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_pd_128 : + GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_pd_128 : + GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_pd_256 : + GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_pd_256 : + GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_pd_512 : + GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_pd_512 : + GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_ps_128 : + GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_ps_128 : + GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_ps_256 : + GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_ps_256 : + GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_ps_512 : + GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_ps_512 : + GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_sd : + GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_sd : + GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_fixupimm_ss : + GCCBuiltin<"__builtin_ia32_fixupimmss_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_fixupimm_ss : + GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -402,6 +402,7 @@ VPTERNLOG, // Fix Up Special Packed Float32/64 values VFIXUPIMM, + VFIXUPIMMS, // Range Restriction Calculation For Packed Pairs of Float32/64 values VRANGE, // Reduce - Perform Reduction Transformation on scalar\packed FP Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -17024,6 +17024,35 @@ Src2, Src1); return DAG.getBitcast(VT, Res); } + case FIXUPIMMS: + case FIXUPIMMS_MASKZ: + case FIXUPIMM: + case FIXUPIMM_MASKZ:{ + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Imm = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ? + Src1 : getZeroVector(VT, Subtarget, DAG, dl); + // We specify 2 possible modes for intrinsics, with/without rounding + // modes. + // First, we check if the intrinsic have rounding mode (7 operands), + // if not, we set rounding mode to "current". + SDValue Rnd; + if (Op.getNumOperands() == 7) + Rnd = Op.getOperand(6); + else + Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ) + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Src3, Imm, Rnd), + Mask, Passthru, Subtarget, DAG); + else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Src3, Imm, Rnd), + Mask, Passthru, Subtarget, DAG); + } case CONVERT_TO_MASK: { MVT SrcVT = Op.getOperand(1).getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); @@ -20934,6 +20963,7 @@ case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; + case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS"; case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -95,6 +95,12 @@ "v" # NumElts # "f" # EltSize, VTName))); + ValueType IntVT = !cast( + !if (!eq (!srl(EltSize,5),0), + VTName, + !if (!eq(TypeVariantName, "f"), + "v" # NumElts # "i" # EltSize, + VTName))); // The string to specify embedded broadcast in assembly. string BroadcastStr = "{1to" # NumElts # "}"; @@ -301,7 +307,7 @@ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, - (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>; + (X86select _.KRCWM:$mask, RHS, _.RC:$src1), X86select>; multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, @@ -6913,19 +6919,6 @@ opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W; } -defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", - avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps", - avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; - -defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info, - 0x55, X86VFixupimm, HasAVX512>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; -defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, - 0x55, X86VFixupimm, HasAVX512>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, HasDQI>, AVX512AIi8Base, EVEX; @@ -7458,3 +7451,112 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>; defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - FixupImm +//===----------------------------------------------------------------------===// + +multiclass avx512_fixupimm_packed opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + let Constraints = "$src1 = $dst" in { + defm rri : AVX512_maskable_3src; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_3src; + defm rmbi : AVX512_maskable_3src, EVEX_B; + } + } // Constraints = "$src1 = $dst" +} + +multiclass avx512_fixupimm_packed_sae opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ +let Constraints = "$src1 = $dst" in { + defm rrib : AVX512_maskable_3src, EVEX_B; + } +} + +multiclass avx512_fixupimm_scalar opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, X86VectorVTInfo _src3VT> { + let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in { + defm rri : AVX512_maskable_3src_scalar; + + defm rrib : AVX512_maskable_3src_scalar, EVEX_B; + let mayLoad = 1 in + defm rmi : AVX512_maskable_3src_scalar; + } +} + +multiclass avx512_fixupimm_packed_all{ + let Predicates = [HasAVX512] in + defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>, + avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>, + AVX512AIi8Base, EVEX_4V, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>, + AVX512AIi8Base, EVEX_4V, EVEX_V128; + defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>, + AVX512AIi8Base, EVEX_4V, EVEX_V256; + } +} + +defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, + f32x_info, v4i32x_info>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, + f64x_info, v2i64x_info>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VFIXUPIMMPS : avx512_fixupimm_packed_all, + EVEX_CD8<32, CD8VF>; +defm VFIXUPIMMPD : avx512_fixupimm_packed_all, + EVEX_CD8<64, CD8VF>, VEX_W; Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -309,6 +309,10 @@ SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>; def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>; +def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisVec<3>, SDTCisInt<4>, SDTCisInt<5>]>; +def SDTFPTernaryOpImmRounds: SDTypeProfile<1, 5, [SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>,SDTCisInt<3>, SDTCisInt<4>, SDTCisInt<5>]>; def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; @@ -405,7 +409,8 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; -def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>; +def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>; +def X86VFixupimmScalar : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRounds>; def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>; def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>; def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,8 +30,8 @@ COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC, - TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, - CONVERT_MASK_TO_VEC, CONVERT_TO_MASK + TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, + FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; struct IntrinsicData { @@ -810,6 +810,14 @@ X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0), + X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0), X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0), @@ -1842,6 +1850,22 @@ X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ, + X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ, + X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_512, FIXUPIMM_MASKZ, + X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_128, FIXUPIMM_MASKZ, + X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_256, FIXUPIMM_MASKZ, + X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_512, FIXUPIMM_MASKZ, + X86ISD::VFIXUPIMM, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMMS_MASKZ, + X86ISD::VFIXUPIMMS, 0), + X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ, + X86ISD::VFIXUPIMMS, 0), X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -7000,3 +7000,179 @@ %res4 = add <16 x i32> %res3, %res2 ret <16 x i32> %res4 } + +declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1} +; CHECK: vpxord %zmm4, %zmm4, %zmm4 +; CHECK: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z} +; CHECK: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0 +; CHECK: vaddpd %zmm4, %zmm3, %zmm1 +; CHECK: vaddpd %zmm0, %zmm1, %zmm0 + + %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4) + %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) + +define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK: vpxord %zmm4, %zmm4, %zmm4 +; CHECK: vmovaps %zmm0, %zmm5 +; CHECK: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z} +; CHECK: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0 +; CHECK: vaddpd %zmm5, %zmm3, %zmm1 +; CHECK: vaddpd %zmm0, %zmm1, %zmm0 + + %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4) + %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK: vpxor %xmm4, %xmm4, %xmm4 +; CHECK: vmovaps %zmm0, %zmm5 +; CHECK: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1} +; CHECK: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 +; CHECK: vaddps %xmm5, %xmm3, %xmm1 +; CHECK: vaddps %xmm0, %xmm1, %xmm0 + + %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4) + %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res3, %res2 + ret <4 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) + +define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmss $5, %xmm2, %xmm1, %xmm4 +; CHECK: vpxor %xmm2, %xmm2, %xmm2 +; CHECK: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK: vaddps %xmm0, %xmm3, %xmm0 +; CHECK: vaddps %xmm4, %xmm0, %xmm0 + + %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8) + %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 4) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res3, %res2 + ret <4 x float> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} +; CHECK: vpxord %zmm4, %zmm4, %zmm4 +; CHECK: vmovaps %zmm0, %zmm5 +; CHECK: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1} +; CHECK: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 +; CHECK: vaddps %zmm5, %zmm3, %zmm1 +; CHECK: vaddps %zmm0, %zmm1, %zmm0 + + %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4) + %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + +declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) + +define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmps $5, %zmm2, %zmm1, %zmm4 +; CHECK: vpxord %zmm2, %zmm2, %zmm2 +; CHECK: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK: vaddps %zmm0, %zmm3, %zmm0 +; CHECK: vaddps %zmm4, %zmm0, %zmm0 + + %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8) + %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 +; CHECK: vpxor %xmm2, %xmm2, %xmm2 +; CHECK: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} +; CHECK: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK: vaddpd %xmm4, %xmm0, %xmm0 + + %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) + %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 -1, i32 4) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) + +define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK: vpxor %xmm4, %xmm4, %xmm4 +; CHECK: vmovaps %zmm0, %zmm5 +; CHECK: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z} +; CHECK: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK: vaddpd %xmm5, %xmm3, %xmm1 +; CHECK: vaddpd %xmm0, %xmm1, %xmm0 + + %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) + %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -7690,3 +7690,173 @@ call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) ret void } + +declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8) + +define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { +; CHECK-LABEL:test_int_x86_avx512_mask_fixupimm_pd_128 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK: vpxor %xmm4, %xmm4, %xmm4 +; CHECK: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} +; CHECK: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 +; CHECK: vaddpd %xmm4, %xmm3, %xmm1 +; CHECK: vaddpd %xmm0, %xmm1, %xmm0 + + %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4) + %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8) + +define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK: vpxor %xmm2, %xmm2, %xmm2 +; CHECK: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK: vaddpd %xmm0, %xmm3, %xmm0 + %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4) + %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4) + ;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1) + %res3 = fadd <2 x double> %res, %res1 + ;%res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res3 +} + +declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8) + +define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} +; CHECK: vpxor %ymm4, %ymm4, %ymm4 +; CHECK: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} +; CHECK: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 +; CHECK: vaddpd %ymm4, %ymm3, %ymm1 +; CHECK: vaddpd %ymm0, %ymm1, %ymm0 + + %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4) + %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4) + %res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res3, %res2 + ret <4 x double> %res4 +} + +declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8) + +define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK: vpxor %ymm4, %ymm4, %ymm4 +; CHECK: vmovaps %zmm0, %zmm5 +; CHECK: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} +; CHECK: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 +; CHECK: vaddpd %ymm5, %ymm3, %ymm1 +; CHECK: vaddpd %ymm0, %ymm1, %ymm0 + + %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4) + %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4) + %res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res3, %res2 + ret <4 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8) + +define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm4 +; CHECK: vpxor %xmm2, %xmm2, %xmm2 +; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} +; CHECK: vaddps %xmm0, %xmm3, %xmm0 +; CHECK: vaddps %xmm4, %xmm0, %xmm0 + + %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) + %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res3, %res2 + ret <4 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8) + +define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm4 +; CHECK: vpxor %xmm2, %xmm2, %xmm2 +; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK: vaddps %xmm0, %xmm3, %xmm0 +; CHECK: vaddps %xmm4, %xmm0, %xmm0 + + %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) + %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) + %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res3, %res2 + ret <4 x float> %res4 +} + +declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8) + +define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm4 +; CHECK: vpxor %ymm2, %ymm2, %ymm2 +; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} +; CHECK: vaddps %ymm0, %ymm3, %ymm0 +; CHECK: vaddps %ymm4, %ymm0, %ymm0 + + %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) + %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) + %res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8) + +define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256 +; CHECK: kmovw %edi, %k1 +; CHECK: vmovaps %zmm0, %zmm3 +; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK: vmovaps %zmm0, %zmm4 +; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm4 +; CHECK: vpxor %ymm2, %ymm2, %ymm2 +; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK: vaddps %ymm0, %ymm3, %ymm0 +; CHECK: vaddps %ymm4, %ymm0, %ymm0 + + %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) + %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) + %res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +}