Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -1715,83 +1715,6 @@ def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; - - def int_x86_avx512_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, - llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty], [IntrNoMem]>; - - def int_x86_avx512_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128">, - Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256">, - Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512">, - Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128">, - Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256">, - Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512">, - Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty, - llvm_i32_ty], [IntrNoMem]>; - - - def int_x86_avx512_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, - llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty], [IntrNoMem]>; - def int_x86_avx512_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx512_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty], [IntrNoMem]>; - } // Gather ops @@ -2163,32 +2086,6 @@ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - - def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem]>; - def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; - def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; - def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_xop_vpshab : GCCBuiltin<"__builtin_ia32_vpshab">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], Index: llvm/trunk/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/trunk/lib/IR/AutoUpgrade.cpp +++ llvm/trunk/lib/IR/AutoUpgrade.cpp @@ -284,10 +284,6 @@ Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 - Name.startswith("avx512.mask.prorv.") || // Added in 7.0 - Name.startswith("avx512.mask.pror.") || // Added in 7.0 - Name.startswith("avx512.mask.prolv.") || // Added in 7.0 - Name.startswith("avx512.mask.prol.") || // Added in 7.0 Name.startswith("avx512.mask.padds.") || // Added in 8.0 Name.startswith("avx512.mask.psubs.") || // Added in 8.0 Name == "sse.cvtsi2ss" || // Added in 7.0 @@ -354,6 +350,13 @@ Name.startswith("avx512.cvtmask2") || // Added in 5.0 (Name.startswith("xop.vpcom") && // Added in 3.2 F->arg_size() == 2) || + Name.startswith("xop.vprot") || // Added in 8.0 + Name.startswith("avx512.prol") || // Added in 8.0 + Name.startswith("avx512.pror") || // Added in 8.0 + Name.startswith("avx512.mask.prorv.") || // Added in 8.0 + Name.startswith("avx512.mask.pror.") || // Added in 8.0 + Name.startswith("avx512.mask.prolv.") || // Added in 8.0 + Name.startswith("avx512.mask.prol.") || // Added in 8.0 Name.startswith("avx512.ptestm") || //Added in 6.0 Name.startswith("avx512.ptestnm") || //Added in 6.0 Name.startswith("sse2.pavg") || // Added in 6.0 @@ -942,6 +945,33 @@ return Res; } +static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, + bool IsRotateRight) { + Type *Ty = CI.getType(); + Value *Src = CI.getArgOperand(0); + Value *Amt = CI.getArgOperand(1); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 so + // we only care about the lowest log2 bits anyway. + if (Amt->getType() != Ty) { + unsigned NumElts = Ty->getVectorNumElements(); + Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); + Amt = Builder.CreateVectorSplat(NumElts, Amt); + } + + Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; + Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); + Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt}); + + if (CI.getNumArgOperands() == 4) { // For masked intrinsics. + Value *VecSrc = CI.getOperand(2); + Value *Mask = CI.getOperand(3); + Res = EmitX86Select(Builder, Mask, Res, VecSrc); + } + return Res; +} + static Value *UpgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned) { @@ -1350,66 +1380,6 @@ IID = Intrinsic::x86_avx512_vpshrd_w_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("prorv.")) { - if (VecWidth == 128 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prorv_d_128; - else if (VecWidth == 256 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prorv_d_256; - else if (VecWidth == 512 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prorv_d_512; - else if (VecWidth == 128 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prorv_q_128; - else if (VecWidth == 256 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prorv_q_256; - else if (VecWidth == 512 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prorv_q_512; - else - llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("prolv.")) { - if (VecWidth == 128 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prolv_d_128; - else if (VecWidth == 256 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prolv_d_256; - else if (VecWidth == 512 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prolv_d_512; - else if (VecWidth == 128 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prolv_q_128; - else if (VecWidth == 256 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prolv_q_256; - else if (VecWidth == 512 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prolv_q_512; - else - llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pror.")) { - if (VecWidth == 128 && EltWidth == 32) - IID = Intrinsic::x86_avx512_pror_d_128; - else if (VecWidth == 256 && EltWidth == 32) - IID = Intrinsic::x86_avx512_pror_d_256; - else if (VecWidth == 512 && EltWidth == 32) - IID = Intrinsic::x86_avx512_pror_d_512; - else if (VecWidth == 128 && EltWidth == 64) - IID = Intrinsic::x86_avx512_pror_q_128; - else if (VecWidth == 256 && EltWidth == 64) - IID = Intrinsic::x86_avx512_pror_q_256; - else if (VecWidth == 512 && EltWidth == 64) - IID = Intrinsic::x86_avx512_pror_q_512; - else - llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("prol.")) { - if (VecWidth == 128 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prol_d_128; - else if (VecWidth == 256 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prol_d_256; - else if (VecWidth == 512 && EltWidth == 32) - IID = Intrinsic::x86_avx512_prol_d_512; - else if (VecWidth == 128 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prol_q_128; - else if (VecWidth == 256 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prol_q_256; - else if (VecWidth == 512 && EltWidth == 64) - IID = Intrinsic::x86_avx512_prol_q_512; - else - llvm_unreachable("Unexpected intrinsic"); } else if (Name.startswith("padds.")) { if (VecWidth == 128 && EltWidth == 8) IID = Intrinsic::x86_sse2_padds_b; @@ -2005,6 +1975,13 @@ Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); Rep = Builder.CreateOr(Sel0, Sel1); + } else if (IsX86 && (Name.startswith("xop.vprot") || + Name.startswith("avx512.prol") || + Name.startswith("avx512.mask.prol"))) { + Rep = upgradeX86Rotate(Builder, *CI, false); + } else if (IsX86 && (Name.startswith("avx512.pror") || + Name.startswith("avx512.mask.pror"))) { + Rep = upgradeX86Rotate(Builder, *CI, true); } else if (IsX86 && Name == "sse42.crc32.64.8") { Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse42_crc32_32_8); Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -939,30 +939,6 @@ X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0), X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0), - X86_INTRINSIC_DATA(avx512_prol_d_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(avx512_prol_d_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(avx512_prol_d_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(avx512_prol_q_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(avx512_prol_q_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(avx512_prol_q_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(avx512_prolv_d_128, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(avx512_prolv_d_256, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(avx512_prolv_d_512, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(avx512_prolv_q_128, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(avx512_prolv_q_256, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(avx512_prolv_q_512, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(avx512_pror_d_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0), - X86_INTRINSIC_DATA(avx512_pror_d_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0), - X86_INTRINSIC_DATA(avx512_pror_d_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0), - X86_INTRINSIC_DATA(avx512_pror_q_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0), - X86_INTRINSIC_DATA(avx512_pror_q_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0), - X86_INTRINSIC_DATA(avx512_pror_q_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0), - X86_INTRINSIC_DATA(avx512_prorv_d_128, INTR_TYPE_2OP, ISD::ROTR, 0), - X86_INTRINSIC_DATA(avx512_prorv_d_256, INTR_TYPE_2OP, ISD::ROTR, 0), - X86_INTRINSIC_DATA(avx512_prorv_d_512, INTR_TYPE_2OP, ISD::ROTR, 0), - X86_INTRINSIC_DATA(avx512_prorv_q_128, INTR_TYPE_2OP, ISD::ROTR, 0), - X86_INTRINSIC_DATA(avx512_prorv_q_256, INTR_TYPE_2OP, ISD::ROTR, 0), - X86_INTRINSIC_DATA(avx512_prorv_q_512, INTR_TYPE_2OP, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0), X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0), @@ -1262,14 +1238,6 @@ X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0), - X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VROTLI, 0), - X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, ISD::ROTL, 0), - X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VROTLI, 0), X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0), X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0), X86_INTRINSIC_DATA(xop_vpshaq, INTR_TYPE_2OP, X86ISD::VPSHA, 0), Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -9350,13 +9350,11 @@ ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = tail call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %0, i32 5) + %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) %2 = bitcast <16 x i32> %1 to <8 x i64> ret <8 x i64> %2 } -declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32) #1 - define <8 x i64> @test_mm512_mask_rol_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_rol_epi32: ; X86: # %bb.0: # %entry @@ -9371,7 +9369,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = tail call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %0, i32 5) + %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) %2 = bitcast <8 x i64> %__W to <16 x i32> %3 = bitcast i16 %__U to <16 x i1> %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 @@ -9393,7 +9391,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = tail call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %0, i32 5) + %1 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) %2 = bitcast i16 %__U to <16 x i1> %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer %4 = bitcast <16 x i32> %3 to <8 x i64> @@ -9406,12 +9404,10 @@ ; CHECK-NEXT: vprolq $5, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %__A, i32 5) + %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) ret <8 x i64> %0 } -declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32) #1 - define <8 x i64> @test_mm512_mask_rol_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_rol_epi64: ; X86: # %bb.0: # %entry @@ -9426,7 +9422,7 @@ ; X64-NEXT: vprolq $5, %zmm1, %zmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %__A, i32 5) + %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W ret <8 x i64> %2 @@ -9446,7 +9442,7 @@ ; X64-NEXT: vprolq $5, %zmm0, %zmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %__A, i32 5) + %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer ret <8 x i64> %2 @@ -9460,7 +9456,7 @@ entry: %0 = bitcast <8 x i64> %__A to <16 x i32> %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = tail call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %0, <16 x i32> %1) + %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) %3 = bitcast <16 x i32> %2 to <8 x i64> ret <8 x i64> %3 } @@ -9480,7 +9476,7 @@ entry: %0 = bitcast <8 x i64> %__A to <16 x i32> %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = tail call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %0, <16 x i32> %1) + %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) %3 = bitcast <8 x i64> %__W to <16 x i32> %4 = bitcast i16 %__U to <16 x i1> %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 @@ -9503,7 +9499,7 @@ entry: %0 = bitcast <8 x i64> %__A to <16 x i32> %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = tail call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %0, <16 x i32> %1) + %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) %3 = bitcast i16 %__U to <16 x i1> %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer %5 = bitcast <16 x i32> %4 to <8 x i64> @@ -9516,7 +9512,7 @@ ; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %__A, <8 x i64> %__B) + %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) ret <8 x i64> %0 } @@ -9534,7 +9530,7 @@ ; X64-NEXT: vprolvq %zmm2, %zmm1, %zmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %__A, <8 x i64> %__B) + %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W ret <8 x i64> %2 @@ -9554,7 +9550,7 @@ ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %__A, <8 x i64> %__B) + %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer ret <8 x i64> %2 @@ -9567,12 +9563,11 @@ ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = tail call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %0, i32 5) + %1 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) %2 = bitcast <16 x i32> %1 to <8 x i64> ret <8 x i64> %2 } -declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32) #1 define <8 x i64> @test_mm512_mask_ror_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_ror_epi32: @@ -9588,7 +9583,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = tail call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %0, i32 5) + %1 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) %2 = bitcast <8 x i64> %__W to <16 x i32> %3 = bitcast i16 %__U to <16 x i1> %4 = select <16 x i1> %3, <16 x i32> %1, <16 x i32> %2 @@ -9610,7 +9605,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = tail call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %0, i32 5) + %1 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> ) %2 = bitcast i16 %__U to <16 x i1> %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer %4 = bitcast <16 x i32> %3 to <8 x i64> @@ -9623,12 +9618,10 @@ ; CHECK-NEXT: vprorq $5, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %__A, i32 5) + %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) ret <8 x i64> %0 } -declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32) #1 - define <8 x i64> @test_mm512_mask_ror_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_ror_epi64: ; X86: # %bb.0: # %entry @@ -9643,7 +9636,7 @@ ; X64-NEXT: vprorq $5, %zmm1, %zmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %__A, i32 5) + %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W ret <8 x i64> %2 @@ -9663,7 +9656,7 @@ ; X64-NEXT: vprorq $5, %zmm0, %zmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %__A, i32 5) + %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer ret <8 x i64> %2 @@ -9677,7 +9670,7 @@ entry: %0 = bitcast <8 x i64> %__A to <16 x i32> %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = tail call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %0, <16 x i32> %1) + %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) %3 = bitcast <16 x i32> %2 to <8 x i64> ret <8 x i64> %3 } @@ -9697,7 +9690,7 @@ entry: %0 = bitcast <8 x i64> %__A to <16 x i32> %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = tail call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %0, <16 x i32> %1) + %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) %3 = bitcast <8 x i64> %__W to <16 x i32> %4 = bitcast i16 %__U to <16 x i1> %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 @@ -9720,7 +9713,7 @@ entry: %0 = bitcast <8 x i64> %__A to <16 x i32> %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = tail call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %0, <16 x i32> %1) + %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %0, <16 x i32> %0, <16 x i32> %1) %3 = bitcast i16 %__U to <16 x i1> %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer %5 = bitcast <16 x i32> %4 to <8 x i64> @@ -9733,7 +9726,7 @@ ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %__A, <8 x i64> %__B) + %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) ret <8 x i64> %0 } @@ -9751,7 +9744,7 @@ ; X64-NEXT: vprorvq %zmm2, %zmm1, %zmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %__A, <8 x i64> %__B) + %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__W ret <8 x i64> %2 @@ -9771,7 +9764,7 @@ ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %__A, <8 x i64> %__B) + %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__A, <8 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer ret <8 x i64> %2 @@ -9799,10 +9792,11 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) -declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>) -declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>) -declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>) -declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>) + +declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) +declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) +declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) +declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) !0 = !{i32 1} Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3457,6 +3457,282 @@ ret <8 x i64> %res4 } +declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>) + +define <16 x i32>@test_int_x86_avx512_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { +; X86-LABEL: test_int_x86_avx512_prolv_d_512: +; X86: ## %bb.0: +; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1] +; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1] +; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] +; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prolv_d_512: +; X64: ## %bb.0: +; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9] +; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1] +; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1] +; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] +; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %5 = bitcast i16 %x3 to <16 x i1> + %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer + %7 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %res3 = add <16 x i32> %3, %6 + %res4 = add <16 x i32> %res3, %7 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>) + +define <8 x i64>@test_int_x86_avx512_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prolv_q_512: +; X86: ## %bb.0: +; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1] +; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1] +; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] +; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prolv_q_512: +; X64: ## %bb.0: +; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9] +; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1] +; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1] +; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] +; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + %4 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + %7 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %res3 = add <8 x i64> %3, %6 + %res4 = add <8 x i64> %res3, %7 + ret <8 x i64> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>) + +define <16 x i32>@test_int_x86_avx512_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { +; X86-LABEL: test_int_x86_avx512_prorv_d_512: +; X86: ## %bb.0: +; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1] +; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1] +; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] +; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prorv_d_512: +; X64: ## %bb.0: +; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9] +; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1] +; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1] +; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] +; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %5 = bitcast i16 %x3 to <16 x i1> + %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer + %7 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %res3 = add <16 x i32> %3, %6 + %res4 = add <16 x i32> %res3, %7 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>) + +define <8 x i64>@test_int_x86_avx512_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prorv_q_512: +; X86: ## %bb.0: +; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1] +; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1] +; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] +; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prorv_q_512: +; X64: ## %bb.0: +; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9] +; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1] +; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1] +; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] +; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + %4 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + %7 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %res3 = add <8 x i64> %3, %6 + %res4 = add <8 x i64> %res3, %7 + ret <8 x i64> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32) + +define <16 x i32>@test_int_x86_avx512_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { +; X86-LABEL: test_int_x86_avx512_prol_d_512: +; X86: ## %bb.0: +; X86-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] +; X86-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03] +; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prol_d_512: +; X64: ## %bb.0: +; X64-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03] +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] +; X64-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03] +; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) + %5 = bitcast i16 %x3 to <16 x i1> + %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer + %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) + %res3 = add <16 x i32> %3, %6 + %res4 = add <16 x i32> %res3, %7 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32) + +define <8 x i64>@test_int_x86_avx512_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prol_q_512: +; X86: ## %bb.0: +; X86-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] +; X86-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03] +; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] +; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prol_q_512: +; X64: ## %bb.0: +; X64-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03] +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] +; X64-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03] +; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] +; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) + %res3 = add <8 x i64> %3, %6 + %res4 = add <8 x i64> %res3, %7 + ret <8 x i64> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32) + +define <16 x i32>@test_int_x86_avx512_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { +; X86-LABEL: test_int_x86_avx512_pror_d_512: +; X86: ## %bb.0: +; X86-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] +; X86-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03] +; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_pror_d_512: +; X64: ## %bb.0: +; X64-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03] +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] +; X64-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03] +; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) + %5 = bitcast i16 %x3 to <16 x i1> + %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer + %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) + %res3 = add <16 x i32> %3, %6 + %res4 = add <16 x i32> %res3, %7 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32) + +define <8 x i64>@test_int_x86_avx512_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_pror_q_512: +; X86: ## %bb.0: +; X86-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] +; X86-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03] +; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] +; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_pror_q_512: +; X64: ## %bb.0: +; X64-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03] +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] +; X64-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03] +; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] +; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) + %res3 = add <8 x i64> %3, %6 + %res4 = add <8 x i64> %res3, %7 + ret <8 x i64> %res4 +} + declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8) define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { @@ -4380,14 +4656,14 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 6, value: LCPI203_0, kind: FK_Data_4 +; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_0, kind: FK_Data_4 ; X86-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 6, value: LCPI203_1, kind: FK_Data_4 +; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_1, kind: FK_Data_4 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] ; X86-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 6, value: LCPI203_2, kind: FK_Data_4 +; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_2, kind: FK_Data_4 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4396,14 +4672,14 @@ ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 6, value: LCPI203_0-4, kind: reloc_riprel_4byte +; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_0-4, kind: reloc_riprel_4byte ; X64-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 6, value: LCPI203_1-4, kind: reloc_riprel_4byte +; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_1-4, kind: reloc_riprel_4byte ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 6, value: LCPI203_2-4, kind: reloc_riprel_4byte +; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_2-4, kind: reloc_riprel_4byte ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> %x2, i16 %x3) Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -3849,198 +3849,6 @@ declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) -declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>) - -define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm3 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 -; CHECK-NEXT: retq - %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) - %2 = bitcast i16 %x3 to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 - %4 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) - %5 = bitcast i16 %x3 to <16 x i1> - %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %7 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) - %res3 = add <16 x i32> %3, %6 - %res4 = add <16 x i32> %res3, %7 - ret <16 x i32> %res4 -} - -declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>) - -define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm3 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 -; CHECK-NEXT: retq - %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 - %4 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer - %7 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) - %res3 = add <8 x i64> %3, %6 - %res4 = add <8 x i64> %res3, %7 - ret <8 x i64> %res4 -} - -declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>) - -define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 -; CHECK-NEXT: retq - %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) - %2 = bitcast i16 %x3 to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 - %4 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) - %5 = bitcast i16 %x3 to <16 x i1> - %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %7 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) - %res3 = add <16 x i32> %3, %6 - %res4 = add <16 x i32> %res3, %7 - ret <16 x i32> %res4 -} - -declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>) - -define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 -; CHECK-NEXT: retq - %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 - %4 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer - %7 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) - %res3 = add <8 x i64> %3, %6 - %res4 = add <8 x i64> %res3, %7 - ret <8 x i64> %res4 -} - -declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32) - -define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprold $3, %zmm0, %zmm2 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) - %2 = bitcast i16 %x3 to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 - %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) - %5 = bitcast i16 %x3 to <16 x i1> - %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) - %res3 = add <16 x i32> %3, %6 - %res4 = add <16 x i32> %res3, %7 - ret <16 x i32> %res4 -} - -declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32) - -define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 - %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer - %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) - %res3 = add <8 x i64> %3, %6 - %res4 = add <8 x i64> %res3, %7 - ret <8 x i64> %res4 -} - -declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32) - -define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprord $3, %zmm0, %zmm2 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vprord $3, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) - %2 = bitcast i16 %x3 to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 - %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) - %5 = bitcast i16 %x3 to <16 x i1> - %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) - %res3 = add <16 x i32> %3, %6 - %res4 = add <16 x i32> %res3, %7 - ret <16 x i32> %res4 -} - -declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32) - -define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vprorq $3, %zmm0, %zmm2 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 - %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer - %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) - %res3 = add <8 x i64> %3, %6 - %res4 = add <8 x i64> %res3, %7 - ret <8 x i64> %res4 -} - declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -6626,13 +6626,11 @@ ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) %2 = bitcast <4 x i32> %1 to <2 x i64> ret <2 x i64> %2 } -declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32) - define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_rol_epi32: ; X86: # %bb.0: # %entry @@ -6648,7 +6646,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) %2 = bitcast <2 x i64> %__W to <4 x i32> %3 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> @@ -6672,7 +6670,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) %2 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer @@ -6687,13 +6685,11 @@ ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 } -declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32) - define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_rol_epi32: ; X86: # %bb.0: # %entry @@ -6709,7 +6705,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) %2 = bitcast <4 x i64> %__W to <8 x i32> %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 @@ -6732,7 +6728,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) %2 = bitcast i8 %__U to <8 x i1> %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer %4 = bitcast <8 x i32> %3 to <4 x i64> @@ -6745,12 +6741,10 @@ ; CHECK-NEXT: vprolq $5, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) ret <2 x i64> %0 } -declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32) - define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_rol_epi64: ; X86: # %bb.0: # %entry @@ -6765,7 +6759,7 @@ ; X64-NEXT: vprolq $5, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W @@ -6786,7 +6780,7 @@ ; X64-NEXT: vprolq $5, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer @@ -6799,12 +6793,10 @@ ; CHECK-NEXT: vprolq $5, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) ret <4 x i64> %0 } -declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32) - define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_rol_epi64: ; X86: # %bb.0: # %entry @@ -6819,7 +6811,7 @@ ; X64-NEXT: vprolq $5, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W @@ -6840,7 +6832,7 @@ ; X64-NEXT: vprolq $5, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer @@ -6855,7 +6847,7 @@ entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <4 x i32> %2 to <2 x i64> ret <2 x i64> %3 } @@ -6876,7 +6868,7 @@ entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <2 x i64> %__W to <4 x i32> %4 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> @@ -6901,7 +6893,7 @@ entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer @@ -6917,7 +6909,7 @@ entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <8 x i32> %2 to <4 x i64> ret <4 x i64> %3 } @@ -6938,7 +6930,7 @@ entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <4 x i64> %__W to <8 x i32> %4 = bitcast i8 %__U to <8 x i1> %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 @@ -6962,7 +6954,7 @@ entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer %5 = bitcast <8 x i32> %4 to <4 x i64> @@ -6975,7 +6967,7 @@ ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) ret <2 x i64> %0 } @@ -6993,7 +6985,7 @@ ; X64-NEXT: vprolvq %xmm2, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W @@ -7014,7 +7006,7 @@ ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer @@ -7027,7 +7019,7 @@ ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) ret <4 x i64> %0 } @@ -7045,7 +7037,7 @@ ; X64-NEXT: vprolvq %ymm2, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W @@ -7066,7 +7058,7 @@ ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer @@ -7080,13 +7072,11 @@ ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) %2 = bitcast <4 x i32> %1 to <2 x i64> ret <2 x i64> %2 } -declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32) - define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_ror_epi32: ; X86: # %bb.0: # %entry @@ -7102,7 +7092,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) %2 = bitcast <2 x i64> %__W to <4 x i32> %3 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> @@ -7126,7 +7116,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> ) %2 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer @@ -7141,13 +7131,11 @@ ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 } -declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32) - define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_ror_epi32: ; X86: # %bb.0: # %entry @@ -7163,7 +7151,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) %2 = bitcast <4 x i64> %__W to <8 x i32> %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 @@ -7186,7 +7174,7 @@ ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> ) %2 = bitcast i8 %__U to <8 x i1> %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer %4 = bitcast <8 x i32> %3 to <4 x i64> @@ -7199,12 +7187,10 @@ ; CHECK-NEXT: vprorq $5, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) ret <2 x i64> %0 } -declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32) - define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_ror_epi64: ; X86: # %bb.0: # %entry @@ -7219,7 +7205,7 @@ ; X64-NEXT: vprorq $5, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W @@ -7240,7 +7226,7 @@ ; X64-NEXT: vprorq $5, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer @@ -7253,12 +7239,10 @@ ; CHECK-NEXT: vprorq $5, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) ret <4 x i64> %0 } -declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32) - define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_ror_epi64: ; X86: # %bb.0: # %entry @@ -7273,7 +7257,7 @@ ; X64-NEXT: vprorq $5, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W @@ -7294,7 +7278,7 @@ ; X64-NEXT: vprorq $5, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> ) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer @@ -7309,7 +7293,7 @@ entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <4 x i32> %2 to <2 x i64> ret <2 x i64> %3 } @@ -7330,7 +7314,7 @@ entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <2 x i64> %__W to <4 x i32> %4 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> @@ -7355,7 +7339,7 @@ entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer @@ -7371,7 +7355,7 @@ entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <8 x i32> %2 to <4 x i64> ret <4 x i64> %3 } @@ -7392,7 +7376,7 @@ entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <4 x i64> %__W to <8 x i32> %4 = bitcast i8 %__U to <8 x i1> %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 @@ -7416,7 +7400,7 @@ entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer %5 = bitcast <8 x i32> %4 to <4 x i64> @@ -7429,7 +7413,7 @@ ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) ret <2 x i64> %0 } @@ -7447,7 +7431,7 @@ ; X64-NEXT: vprorvq %xmm2, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W @@ -7468,7 +7452,7 @@ ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer @@ -7481,7 +7465,7 @@ ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) ret <4 x i64> %0 } @@ -7499,7 +7483,7 @@ ; X64-NEXT: vprorvq %ymm2, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W @@ -7520,7 +7504,7 @@ ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer @@ -7572,13 +7556,13 @@ declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>) declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>) declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>) -declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>) -declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>) -declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>) -declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) +declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) !0 = !{i32 1} Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -12592,6 +12592,590 @@ ret <4 x i64> %res4 } +declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx512_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prorv_d_128: +; X86: # %bb.0: +; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] +; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] +; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prorv_d_128: +; X64: # %bb.0: +; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] +; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] +; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 + %4 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer + %7 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) + %res3 = add <4 x i32> %3, %6 + %res4 = add <4 x i32> %res3, %7 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx512_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prorv_d_256: +; X86: # %bb.0: +; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] +; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] +; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] +; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prorv_d_256: +; X64: # %bb.0: +; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] +; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] +; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] +; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 + %4 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer + %7 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) + %res3 = add <8 x i32> %3, %6 + %res4 = add <8 x i32> %res3, %7 + ret <8 x i32> %res4 +} + +declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>) + +define <2 x i64>@test_int_x86_avx512_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prorv_q_128: +; X86: # %bb.0: +; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] +; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] +; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] +; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prorv_q_128: +; X64: # %bb.0: +; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] +; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] +; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] +; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> + %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 + %4 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> + %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer + %7 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) + %res3 = add <2 x i64> %3, %6 + %res4 = add <2 x i64> %res3, %7 + ret <2 x i64> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>) + +define <4 x i64>@test_int_x86_avx512_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prorv_q_256: +; X86: # %bb.0: +; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] +; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] +; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] +; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prorv_q_256: +; X64: # %bb.0: +; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] +; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] +; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] +; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 + %4 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer + %7 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) + %res3 = add <4 x i64> %3, %6 + %res4 = add <4 x i64> %res3, %7 + ret <4 x i64> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32) + +define <4 x i32>@test_int_x86_avx512_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prol_d_128: +; X86: # %bb.0: +; X86-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] +; X86-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03] +; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prol_d_128: +; X64: # %bb.0: +; X64-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] +; X64-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03] +; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 + %4 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer + %7 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) + %res3 = add <4 x i32> %3, %6 + %res4 = add <4 x i32> %res3, %7 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32) + +define <8 x i32>@test_int_x86_avx512_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prol_d_256: +; X86: # %bb.0: +; X86-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] +; X86-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03] +; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prol_d_256: +; X64: # %bb.0: +; X64-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] +; X64-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03] +; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 + %4 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer + %7 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) + %res3 = add <8 x i32> %3, %6 + %res4 = add <8 x i32> %res3, %7 + ret <8 x i32> %res4 +} + +declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32) + +define <2 x i64>@test_int_x86_avx512_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prol_q_128: +; X86: # %bb.0: +; X86-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] +; X86-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03] +; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] +; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prol_q_128: +; X64: # %bb.0: +; X64-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] +; X64-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03] +; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] +; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> + %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 + %4 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> + %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer + %7 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) + %res3 = add <2 x i64> %3, %6 + %res4 = add <2 x i64> %res3, %7 + ret <2 x i64> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32) + +define <4 x i64>@test_int_x86_avx512_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prol_q_256: +; X86: # %bb.0: +; X86-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] +; X86-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03] +; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] +; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prol_q_256: +; X64: # %bb.0: +; X64-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] +; X64-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03] +; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] +; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 + %4 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer + %7 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) + %res3 = add <4 x i64> %3, %6 + %res4 = add <4 x i64> %res3, %7 + ret <4 x i64> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx512_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prolv_d_128: +; X86: # %bb.0: +; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] +; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] +; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prolv_d_128: +; X64: # %bb.0: +; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] +; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] +; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 + %4 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer + %7 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) + %res3 = add <4 x i32> %3, %6 + %res4 = add <4 x i32> %res3, %7 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx512_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prolv_d_256: +; X86: # %bb.0: +; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] +; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] +; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] +; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prolv_d_256: +; X64: # %bb.0: +; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] +; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] +; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] +; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 + %4 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer + %7 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) + %res3 = add <8 x i32> %3, %6 + %res4 = add <8 x i32> %res3, %7 + ret <8 x i32> %res4 +} + +declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>) + +define <2 x i64>@test_int_x86_avx512_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prolv_q_128: +; X86: # %bb.0: +; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] +; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] +; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] +; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prolv_q_128: +; X64: # %bb.0: +; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] +; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] +; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] +; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> + %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 + %4 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> + %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer + %7 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) + %res3 = add <2 x i64> %3, %6 + %res4 = add <2 x i64> %res3, %7 + ret <2 x i64> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>) + +define <4 x i64>@test_int_x86_avx512_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_prolv_q_256: +; X86: # %bb.0: +; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] +; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] +; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] +; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_prolv_q_256: +; X64: # %bb.0: +; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] +; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] +; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] +; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 + %4 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer + %7 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) + %res3 = add <4 x i64> %3, %6 + %res4 = add <4 x i64> %res3, %7 + ret <4 x i64> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32) + +define <4 x i32>@test_int_x86_avx512_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_pror_d_128: +; X86: # %bb.0: +; X86-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] +; X86-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03] +; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_pror_d_128: +; X64: # %bb.0: +; X64-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] +; X64-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03] +; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 + %4 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer + %7 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) + %res3 = add <4 x i32> %3, %6 + %res4 = add <4 x i32> %res3, %7 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32) + +define <8 x i32>@test_int_x86_avx512_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_pror_d_256: +; X86: # %bb.0: +; X86-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] +; X86-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03] +; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_pror_d_256: +; X64: # %bb.0: +; X64-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] +; X64-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03] +; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 + %4 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer + %7 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) + %res3 = add <8 x i32> %3, %6 + %res4 = add <8 x i32> %res3, %7 + ret <8 x i32> %res4 +} + +declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32) + +define <2 x i64>@test_int_x86_avx512_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_pror_q_128: +; X86: # %bb.0: +; X86-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] +; X86-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03] +; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] +; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_pror_q_128: +; X64: # %bb.0: +; X64-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] +; X64-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03] +; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] +; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> + %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 + %4 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> + %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer + %7 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) + %res3 = add <2 x i64> %3, %6 + %res4 = add <2 x i64> %res3, %7 + ret <2 x i64> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32) + +define <4 x i64>@test_int_x86_avx512_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { +; X86-LABEL: test_int_x86_avx512_pror_q_256: +; X86: # %bb.0: +; X86-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] +; X86-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03] +; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] +; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_pror_q_256: +; X64: # %bb.0: +; X64-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] +; X64-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03] +; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] +; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 + %4 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) + %5 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> + %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer + %7 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) + %res3 = add <4 x i64> %3, %6 + %res4 = add <4 x i64> %res3, %7 + ret <4 x i64> %res4 +} + declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4725,590 +4725,6 @@ declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone -declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>) - -define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128: -; X86: # %bb.0: -; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] -; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] -; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128: -; X64: # %bb.0: -; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] -; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] -; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 - %4 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer - %7 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1) - %res3 = add <4 x i32> %3, %6 - %res4 = add <4 x i32> %res3, %7 - ret <4 x i32> %res4 -} - -declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>) - -define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256: -; X86: # %bb.0: -; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] -; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] -; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256: -; X64: # %bb.0: -; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] -; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] -; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 - %4 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %7 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1) - %res3 = add <8 x i32> %3, %6 - %res4 = add <8 x i32> %res3, %7 - ret <8 x i32> %res4 -} - -declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>) - -define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128: -; X86: # %bb.0: -; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] -; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] -; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128: -; X64: # %bb.0: -; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] -; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] -; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> - %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 - %4 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> - %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer - %7 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1) - %res3 = add <2 x i64> %3, %6 - %res4 = add <2 x i64> %res3, %7 - ret <2 x i64> %res4 -} - -declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>) - -define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256: -; X86: # %bb.0: -; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] -; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] -; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256: -; X64: # %bb.0: -; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] -; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] -; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 - %4 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer - %7 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1) - %res3 = add <4 x i64> %3, %6 - %res4 = add <4 x i64> %res3, %7 - ret <4 x i64> %res4 -} - -declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32) - -define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prol_d_128: -; X86: # %bb.0: -; X86-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] -; X86-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03] -; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prol_d_128: -; X64: # %bb.0: -; X64-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] -; X64-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03] -; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 - %4 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer - %7 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) - %res3 = add <4 x i32> %3, %6 - %res4 = add <4 x i32> %res3, %7 - ret <4 x i32> %res4 -} - -declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32) - -define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prol_d_256: -; X86: # %bb.0: -; X86-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] -; X86-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03] -; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prol_d_256: -; X64: # %bb.0: -; X64-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] -; X64-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03] -; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 - %4 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %7 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) - %res3 = add <8 x i32> %3, %6 - %res4 = add <8 x i32> %res3, %7 - ret <8 x i32> %res4 -} - -declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32) - -define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prol_q_128: -; X86: # %bb.0: -; X86-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] -; X86-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03] -; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] -; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prol_q_128: -; X64: # %bb.0: -; X64-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] -; X64-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03] -; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] -; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> - %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 - %4 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> - %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer - %7 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) - %res3 = add <2 x i64> %3, %6 - %res4 = add <2 x i64> %res3, %7 - ret <2 x i64> %res4 -} - -declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32) - -define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prol_q_256: -; X86: # %bb.0: -; X86-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] -; X86-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03] -; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] -; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prol_q_256: -; X64: # %bb.0: -; X64-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] -; X64-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03] -; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] -; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 - %4 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer - %7 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) - %res3 = add <4 x i64> %3, %6 - %res4 = add <4 x i64> %res3, %7 - ret <4 x i64> %res4 -} - -declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>) - -define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128: -; X86: # %bb.0: -; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] -; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] -; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128: -; X64: # %bb.0: -; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] -; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] -; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 - %4 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer - %7 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1) - %res3 = add <4 x i32> %3, %6 - %res4 = add <4 x i32> %res3, %7 - ret <4 x i32> %res4 -} - -declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>) - -define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256: -; X86: # %bb.0: -; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] -; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] -; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256: -; X64: # %bb.0: -; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] -; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] -; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 - %4 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %7 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1) - %res3 = add <8 x i32> %3, %6 - %res4 = add <8 x i32> %res3, %7 - ret <8 x i32> %res4 -} - -declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>) - -define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128: -; X86: # %bb.0: -; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] -; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] -; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128: -; X64: # %bb.0: -; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] -; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] -; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> - %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 - %4 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> - %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer - %7 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1) - %res3 = add <2 x i64> %3, %6 - %res4 = add <2 x i64> %res3, %7 - ret <2 x i64> %res4 -} - -declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>) - -define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256: -; X86: # %bb.0: -; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] -; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] -; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256: -; X64: # %bb.0: -; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] -; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] -; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 - %4 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer - %7 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1) - %res3 = add <4 x i64> %3, %6 - %res4 = add <4 x i64> %res3, %7 - ret <4 x i64> %res4 -} - -declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32) - -define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_pror_d_128: -; X86: # %bb.0: -; X86-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] -; X86-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03] -; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_pror_d_128: -; X64: # %bb.0: -; X64-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] -; X64-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03] -; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2 - %4 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer - %7 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) - %res3 = add <4 x i32> %3, %6 - %res4 = add <4 x i32> %res3, %7 - ret <4 x i32> %res4 -} - -declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32) - -define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_pror_d_256: -; X86: # %bb.0: -; X86-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] -; X86-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03] -; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_pror_d_256: -; X64: # %bb.0: -; X64-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] -; X64-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03] -; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 - %4 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %7 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) - %res3 = add <8 x i32> %3, %6 - %res4 = add <8 x i32> %res3, %7 - ret <8 x i32> %res4 -} - -declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32) - -define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_pror_q_128: -; X86: # %bb.0: -; X86-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] -; X86-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03] -; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] -; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_pror_q_128: -; X64: # %bb.0: -; X64-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] -; X64-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03] -; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] -; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> - %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2 - %4 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> - %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer - %7 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) - %res3 = add <2 x i64> %3, %6 - %res4 = add <2 x i64> %res3, %7 - ret <2 x i64> %res4 -} - -declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32) - -define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_pror_q_256: -; X86: # %bb.0: -; X86-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] -; X86-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03] -; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] -; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_int_x86_avx512_mask_pror_q_256: -; X64: # %bb.0: -; X64-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03] -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] -; X64-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03] -; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] -; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) - %2 = bitcast i8 %x3 to <8 x i1> - %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2 - %4 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) - %5 = bitcast i8 %x3 to <8 x i1> - %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> - %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer - %7 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) - %res3 = add <4 x i64> %3, %6 - %res4 = add <4 x i64> %res3, %7 - ret <4 x i64> %res4 -} - declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { Index: llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll @@ -388,11 +388,11 @@ ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %arg0, <16 x i8> %arg1) + %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %arg0, <16 x i8> %arg0, <16 x i8> %arg1) %bc = bitcast <16 x i8> %res to <2 x i64> ret <2 x i64> %bc } -declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone +declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_rot_epi16: @@ -401,11 +401,11 @@ ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %arg0, <8 x i16> %arg1) + %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %arg0, <8 x i16> %arg0, <8 x i16> %arg1) %bc = bitcast <8 x i16> %res to <2 x i64> ret <2 x i64> %bc } -declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_rot_epi32: @@ -414,21 +414,21 @@ ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %arg0, <4 x i32> %arg1) + %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %arg0, <4 x i32> %arg0, <4 x i32> %arg1) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc } -declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_rot_epi64: ; ALL: # %bb.0: ; ALL-NEXT: vprotq %xmm1, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} - %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) + %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> %a1) ret <2 x i64> %res } -declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) { ; ALL-LABEL: test_mm_roti_epi8: @@ -436,45 +436,41 @@ ; ALL-NEXT: vprotb $1, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %arg0, i8 1) + %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %arg0, <16 x i8> %arg0, <16 x i8> ) %bc = bitcast <16 x i8> %res to <2 x i64> ret <2 x i64> %bc } -declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) { ; ALL-LABEL: test_mm_roti_epi16: ; ALL: # %bb.0: -; ALL-NEXT: vprotw $50, %xmm0, %xmm0 +; ALL-NEXT: vprotw $2, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %arg0, i8 50) + %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %arg0, <8 x i16> %arg0, <8 x i16> ) %bc = bitcast <8 x i16> %res to <2 x i64> ret <2 x i64> %bc } -declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) { ; ALL-LABEL: test_mm_roti_epi32: ; ALL: # %bb.0: -; ALL-NEXT: vprotd $226, %xmm0, %xmm0 +; ALL-NEXT: vprotd $2, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> - %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %arg0, i8 -30) + %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %arg0, <4 x i32> %arg0, <4 x i32> ) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc } -declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) { ; ALL-LABEL: test_mm_roti_epi64: ; ALL: # %bb.0: -; ALL-NEXT: vprotq $100, %xmm0, %xmm0 +; ALL-NEXT: vprotq $36, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} - %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 100) + %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> ) ret <2 x i64> %res } -declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_shl_epi8: @@ -773,20 +769,3 @@ ret <4 x double> %res } declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone - - - - - - - - - - - - - - - - - Index: llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll @@ -764,3 +764,82 @@ } declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone +define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: test_int_x86_xop_vprotb: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: test_int_x86_xop_vprotd: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: test_int_x86_xop_vprotq: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: test_int_x86_xop_vprotw: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) { +; CHECK-LABEL: test_int_x86_xop_vprotbi: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotb $1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) { +; CHECK-LABEL: test_int_x86_xop_vprotdi: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotd $30, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) { +; CHECK-LABEL: test_int_x86_xop_vprotqi: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotq $3, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) { +; CHECK-LABEL: test_int_x86_xop_vprotwi: +; CHECK: # %bb.0: +; CHECK-NEXT: vprotw $12, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone Index: llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll +++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -450,86 +450,6 @@ } declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) { -; CHECK-LABEL: test_int_x86_xop_vprotb: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotb %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ; - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone - -define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: test_int_x86_xop_vprotd: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ; - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone - -define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: test_int_x86_xop_vprotq: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotq %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ; - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone - -define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: test_int_x86_xop_vprotw: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ; - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) { -; CHECK-LABEL: test_int_x86_xop_vprotbi: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotb $1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ; - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone - -define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) { -; CHECK-LABEL: test_int_x86_xop_vprotdi: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotd $254, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ; - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone - -define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) { -; CHECK-LABEL: test_int_x86_xop_vprotqi: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotq $3, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ; - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone - -define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) { -; CHECK-LABEL: test_int_x86_xop_vprotwi: -; CHECK: # %bb.0: -; CHECK-NEXT: vprotw $252, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ; - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone - define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpshab: ; CHECK: # %bb.0: