Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -902,10 +902,11 @@ auto *VecTy = cast(II.getType()); auto *MaskEltTy = Type::getInt32Ty(II.getContext()); unsigned Size = VecTy->getNumElements(); - assert(Size == 8 && "Unexpected shuffle mask size"); + assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) && + "Unexpected shuffle mask size"); // Construct a shuffle mask from constant integers or UNDEFs. - Constant *Indexes[8] = {nullptr}; + Constant *Indexes[64] = {nullptr}; for (unsigned I = 0; I < Size; ++I) { Constant *COp = V->getAggregateElement(I); @@ -917,8 +918,8 @@ continue; } - APInt Index = cast(COp)->getValue(); - Index = Index.zextOrTrunc(32).getLoBits(3); + uint32_t Index = cast(COp)->getZExtValue(); + Index &= Size - 1; Indexes[I] = ConstantInt::get(MaskEltTy, Index); } @@ -1033,6 +1034,29 @@ return nullptr; } +// Emit a select instruction and appropriate bitcasts to help simplify +// masked intrinsics. +static Value *emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, + InstCombiner::BuilderTy &Builder) { + auto *MaskTy = VectorType::get(Builder.getInt1Ty(), + cast(Mask->getType())->getBitWidth()); + Mask = Builder.CreateBitCast(Mask, MaskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + unsigned VWidth = Op0->getType()->getVectorNumElements(); + if (VWidth < 8) { + uint32_t Indices[4]; + for (unsigned i = 0; i != VWidth; ++i) + Indices[i] = i; + Mask = Builder.CreateShuffleVector(Mask, Mask, + makeArrayRef(Indices, VWidth), + "extract"); + } + + return Builder.CreateSelect(Mask, Op0, Op1); +} + static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) { Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); @@ -2121,6 +2145,28 @@ return replaceInstUsesWith(*II, V); break; + case Intrinsic::x86_avx512_mask_permvar_df_256: + case Intrinsic::x86_avx512_mask_permvar_df_512: + case Intrinsic::x86_avx512_mask_permvar_di_256: + case Intrinsic::x86_avx512_mask_permvar_di_512: + case Intrinsic::x86_avx512_mask_permvar_hi_128: + case Intrinsic::x86_avx512_mask_permvar_hi_256: + case Intrinsic::x86_avx512_mask_permvar_hi_512: + case Intrinsic::x86_avx512_mask_permvar_qi_128: + case Intrinsic::x86_avx512_mask_permvar_qi_256: + case Intrinsic::x86_avx512_mask_permvar_qi_512: + case Intrinsic::x86_avx512_mask_permvar_sf_256: + case Intrinsic::x86_avx512_mask_permvar_sf_512: + case Intrinsic::x86_avx512_mask_permvar_si_256: + case Intrinsic::x86_avx512_mask_permvar_si_512: + if (Value *V = simplifyX86vpermv(*II, *Builder)) { + // We simplified the permuting, now create a select for the masking. + V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), + *Builder); + return replaceInstUsesWith(*II, V); + } + break; + case Intrinsic::x86_avx_vperm2f128_pd_256: case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_si_256: Index: llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll @@ -1049,3 +1049,1131 @@ %3 = extractelement <2 x double> %2, i32 1 ret double %3 } + +declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @identity_test_permvar_si_256( +; CHECK-NEXT: ret <8 x i32> %a0 +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> , <8 x i32> undef, i8 -1) + ret <8 x i32> %a +} + +define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> %a0, <8 x i32> %passthru +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> , <8 x i32> %passthru, i8 %mask) + ret <8 x i32> %a +} + +define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @zero_test_permvar_si_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> undef, i8 -1) + ret <8 x i32> %a +} + +define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> %passthru +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> %passthru, i8 %mask) + ret <8 x i32> %a +} + +define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_si_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> , <8 x i32> undef, i8 -1) + ret <8 x i32> %a +} + +define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> %passthru +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> , <8 x i32> %passthru, i8 %mask) + ret <8 x i32> %a +} + +define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @undef_test_permvar_si_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> , <8 x i32> undef, i8 -1) + ret <8 x i32> %a +} + +define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> %passthru +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> , <8 x i32> %passthru, i8 %mask) + ret <8 x i32> %a +} + +declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) + +define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @identity_test_permvar_sf_256( +; CHECK-NEXT: ret <8 x float> %a0 +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> , <8 x float> undef, i8 -1) + ret <8 x float> %a +} + +define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> %a0, <8 x float> %passthru +; CHECK-NEXT: ret <8 x float> [[TMP2]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> , <8 x float> %passthru, i8 %mask) + ret <8 x float> %a +} + +define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @zero_test_permvar_sf_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> undef, i8 -1) + ret <8 x float> %a +} + +define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> %passthru +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> %passthru, i8 %mask) + ret <8 x float> %a +} + +define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_sf_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> , <8 x float> undef, i8 -1) + ret <8 x float> %a +} + +define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> %passthru +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> , <8 x float> %passthru, i8 %mask) + ret <8 x float> %a +} + +define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @undef_test_permvar_sf_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> , <8 x float> undef, i8 -1) + ret <8 x float> %a +} + +define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> %passthru +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; + %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> , <8 x float> %passthru, i8 %mask) + ret <8 x float> %a +} + +declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @identity_test_permvar_di_256( +; CHECK-NEXT: ret <4 x i64> %a0 +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> , <4 x i64> undef, i8 -1) + ret <4 x i64> %a +} + +define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> %a0, <4 x i64> %passthru +; CHECK-NEXT: ret <4 x i64> [[TMP3]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> , <4 x i64> %passthru, i8 %mask) + ret <4 x i64> %a +} + +define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @zero_test_permvar_di_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> undef, i8 -1) + ret <4 x i64> %a +} + +define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP1]], <4 x i64> %passthru +; CHECK-NEXT: ret <4 x i64> [[TMP4]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> %passthru, i8 %mask) + ret <4 x i64> %a +} + +define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_di_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> , <4 x i64> undef, i8 -1) + ret <4 x i64> %a +} + +define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP1]], <4 x i64> %passthru +; CHECK-NEXT: ret <4 x i64> [[TMP4]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> , <4 x i64> %passthru, i8 %mask) + ret <4 x i64> %a +} + +define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @undef_test_permvar_di_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> , <4 x i64> undef, i8 -1) + ret <4 x i64> %a +} + +define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP1]], <4 x i64> %passthru +; CHECK-NEXT: ret <4 x i64> [[TMP4]] +; + %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> , <4 x i64> %passthru, i8 %mask) + ret <4 x i64> %a +} + +declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) + +define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @identity_test_permvar_df_256( +; CHECK-NEXT: ret <4 x double> %a0 +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> , <4 x double> undef, i8 -1) + ret <4 x double> %a +} + +define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> %a0, <4 x double> %passthru +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> , <4 x double> %passthru, i8 %mask) + ret <4 x double> %a +} + +define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @zero_test_permvar_df_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> undef, i8 -1) + ret <4 x double> %a +} + +define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> %passthru +; CHECK-NEXT: ret <4 x double> [[TMP4]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> %passthru, i8 %mask) + ret <4 x double> %a +} + +define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_df_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> , <4 x double> undef, i8 -1) + ret <4 x double> %a +} + +define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> %passthru +; CHECK-NEXT: ret <4 x double> [[TMP4]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> , <4 x double> %passthru, i8 %mask) + ret <4 x double> %a +} + +define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @undef_test_permvar_df_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> , <4 x double> undef, i8 -1) + ret <4 x double> %a +} + +define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> %passthru +; CHECK-NEXT: ret <4 x double> [[TMP4]] +; + %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> , <4 x double> %passthru, i8 %mask) + ret <4 x double> %a +} + +declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @identity_test_permvar_si_512( +; CHECK-NEXT: ret <16 x i32> %a0 +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> , <16 x i32> undef, i16 -1) + ret <16 x i32> %a +} + +define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> %a0, <16 x i32> %passthru +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> , <16 x i32> %passthru, i16 %mask) + ret <16 x i32> %a +} + +define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @zero_test_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1) + ret <16 x i32> %a +} + +define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> %passthru +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> %passthru, i16 %mask) + ret <16 x i32> %a +} + +define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> , <16 x i32> undef, i16 -1) + ret <16 x i32> %a +} + +define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> %passthru +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> , <16 x i32> %passthru, i16 %mask) + ret <16 x i32> %a +} + +define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @undef_test_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> , <16 x i32> undef, i16 -1) + ret <16 x i32> %a +} + +define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> %passthru +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> , <16 x i32> %passthru, i16 %mask) + ret <16 x i32> %a +} + +declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) + +define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @identity_test_permvar_sf_512( +; CHECK-NEXT: ret <16 x float> %a0 +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> , <16 x float> undef, i16 -1) + ret <16 x float> %a +} + +define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> %a0, <16 x float> %passthru +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> , <16 x float> %passthru, i16 %mask) + ret <16 x float> %a +} + +define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @zero_test_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> undef, i16 -1) + ret <16 x float> %a +} + +define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %passthru +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> %passthru, i16 %mask) + ret <16 x float> %a +} + +define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> , <16 x float> undef, i16 -1) + ret <16 x float> %a +} + +define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %passthru +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> , <16 x float> %passthru, i16 %mask) + ret <16 x float> %a +} + +define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @undef_test_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> , <16 x float> undef, i16 -1) + ret <16 x float> %a +} + +define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %passthru +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> , <16 x float> %passthru, i16 %mask) + ret <16 x float> %a +} + +declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @identity_test_permvar_di_512( +; CHECK-NEXT: ret <8 x i64> %a0 +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> , <8 x i64> undef, i8 -1) + ret <8 x i64> %a +} + +define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> %a0, <8 x i64> %passthru +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> , <8 x i64> %passthru, i8 %mask) + ret <8 x i64> %a +} + +define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @zero_test_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1) + ret <8 x i64> %a +} + +define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> %passthru +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> %passthru, i8 %mask) + ret <8 x i64> %a +} + +define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> , <8 x i64> undef, i8 -1) + ret <8 x i64> %a +} + +define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> %passthru +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> , <8 x i64> %passthru, i8 %mask) + ret <8 x i64> %a +} + +define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @undef_test_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> , <8 x i64> undef, i8 -1) + ret <8 x i64> %a +} + +define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> %passthru +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> , <8 x i64> %passthru, i8 %mask) + ret <8 x i64> %a +} + +declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) + +define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @identity_test_permvar_df_512( +; CHECK-NEXT: ret <8 x double> %a0 +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> , <8 x double> undef, i8 -1) + ret <8 x double> %a +} + +define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> %a0, <8 x double> %passthru +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> , <8 x double> %passthru, i8 %mask) + ret <8 x double> %a +} + +define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @zero_test_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> undef, i8 -1) + ret <8 x double> %a +} + +define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %passthru +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> %passthru, i8 %mask) + ret <8 x double> %a +} + +define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> , <8 x double> undef, i8 -1) + ret <8 x double> %a +} + +define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %passthru +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> , <8 x double> %passthru, i8 %mask) + ret <8 x double> %a +} + +define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @undef_test_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> , <8 x double> undef, i8 -1) + ret <8 x double> %a +} + +define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %passthru +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> , <8 x double> %passthru, i8 %mask) + ret <8 x double> %a +} + +declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @identity_test_permvar_hi_128( +; CHECK-NEXT: ret <8 x i16> %a0 +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> , <8 x i16> undef, i8 -1) + ret <8 x i16> %a +} + +define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> %a0, <8 x i16> %passthru +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> , <8 x i16> %passthru, i8 %mask) + ret <8 x i16> %a +} + +define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @zero_test_permvar_hi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> undef, i8 -1) + ret <8 x i16> %a +} + +define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> %passthru +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> %passthru, i8 %mask) + ret <8 x i16> %a +} + +define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_hi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> , <8 x i16> undef, i8 -1) + ret <8 x i16> %a +} + +define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> %passthru +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> , <8 x i16> %passthru, i8 %mask) + ret <8 x i16> %a +} + +define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @undef_test_permvar_hi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> , <8 x i16> undef, i8 -1) + ret <8 x i16> %a +} + +define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> %passthru +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> , <8 x i16> %passthru, i8 %mask) + ret <8 x i16> %a +} + +declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @identity_test_permvar_hi_256( +; CHECK-NEXT: ret <16 x i16> %a0 +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> undef, i16 -1) + ret <16 x i16> %a +} + +define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> %a0, <16 x i16> %passthru +; CHECK-NEXT: ret <16 x i16> [[TMP2]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> %passthru, i16 %mask) + ret <16 x i16> %a +} + +define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @zero_test_permvar_hi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> undef, i16 -1) + ret <16 x i16> %a +} + +define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> %passthru +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> %passthru, i16 %mask) + ret <16 x i16> %a +} + +define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_hi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> undef, i16 -1) + ret <16 x i16> %a +} + +define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> %passthru +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> %passthru, i16 %mask) + ret <16 x i16> %a +} + +define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @undef_test_permvar_hi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> undef, i16 -1) + ret <16 x i16> %a +} + +define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> %passthru +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> %passthru, i16 %mask) + ret <16 x i16> %a +} + +declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @identity_test_permvar_hi_512( +; CHECK-NEXT: ret <32 x i16> %a0 +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> , <32 x i16> undef, i32 -1) + ret <32 x i16> %a +} + +define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @identity_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> %a0, <32 x i16> %passthru +; CHECK-NEXT: ret <32 x i16> [[TMP2]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> , <32 x i16> %passthru, i32 %mask) + ret <32 x i16> %a +} + +define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @zero_test_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> undef, i32 -1) + ret <32 x i16> %a +} + +define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @zero_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> %passthru +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> %passthru, i32 %mask) + ret <32 x i16> %a +} + +define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> , <32 x i16> undef, i32 -1) + ret <32 x i16> %a +} + +define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> %passthru +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> , <32 x i16> %passthru, i32 %mask) + ret <32 x i16> %a +} + +define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @undef_test_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> , <32 x i16> undef, i32 -1) + ret <32 x i16> %a +} + +define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @undef_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> %passthru +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> , <32 x i16> %passthru, i32 %mask) + ret <32 x i16> %a +} + +declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @identity_test_permvar_qi_128( +; CHECK-NEXT: ret <16 x i8> %a0 +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> , <16 x i8> undef, i16 -1) + ret <16 x i8> %a +} + +define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> %a0, <16 x i8> %passthru +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> , <16 x i8> %passthru, i16 %mask) + ret <16 x i8> %a +} + +define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @zero_test_permvar_qi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> undef, i16 -1) + ret <16 x i8> %a +} + +define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> %passthru +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> %passthru, i16 %mask) + ret <16 x i8> %a +} + +define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_qi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> , <16 x i8> undef, i16 -1) + ret <16 x i8> %a +} + +define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> %passthru +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> , <16 x i8> %passthru, i16 %mask) + ret <16 x i8> %a +} + +define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @undef_test_permvar_qi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> , <16 x i8> undef, i16 -1) + ret <16 x i8> %a +} + +define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> %passthru +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> , <16 x i8> %passthru, i16 %mask) + ret <16 x i8> %a +} + +declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @identity_test_permvar_qi_256( +; CHECK-NEXT: ret <32 x i8> %a0 +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> , <32 x i8> undef, i32 -1) + ret <32 x i8> %a +} + +define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @identity_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> %a0, <32 x i8> %passthru +; CHECK-NEXT: ret <32 x i8> [[TMP2]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> , <32 x i8> %passthru, i32 %mask) + ret <32 x i8> %a +} + +define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @zero_test_permvar_qi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> undef, i32 -1) + ret <32 x i8> %a +} + +define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @zero_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> %passthru +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> %passthru, i32 %mask) + ret <32 x i8> %a +} + +define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_qi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> , <32 x i8> undef, i32 -1) + ret <32 x i8> %a +} + +define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> %passthru +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> , <32 x i8> %passthru, i32 %mask) + ret <32 x i8> %a +} + +define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @undef_test_permvar_qi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> , <32 x i8> undef, i32 -1) + ret <32 x i8> %a +} + +define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @undef_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> %passthru +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> , <32 x i8> %passthru, i32 %mask) + ret <32 x i8> %a +} + +declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @identity_test_permvar_qi_512( +; CHECK-NEXT: ret <64 x i8> %a0 +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> , <64 x i8> undef, i64 -1) + ret <64 x i8> %a +} + +define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @identity_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 %mask to <64 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> %a0, <64 x i8> %passthru +; CHECK-NEXT: ret <64 x i8> [[TMP2]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> , <64 x i8> %passthru, i64 %mask) + ret <64 x i8> %a +} + +define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @zero_test_permvar_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> undef, i64 -1) + ret <64 x i8> %a +} + +define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @zero_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 %mask to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> %passthru +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> %passthru, i64 %mask) + ret <64 x i8> %a +} + +define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> , <64 x i8> undef, i64 -1) + ret <64 x i8> %a +} + +define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 %mask to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> %passthru +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> , <64 x i8> %passthru, i64 %mask) + ret <64 x i8> %a +} + +define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @undef_test_permvar_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> , <64 x i8> undef, i64 -1) + ret <64 x i8> %a +} + +define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @undef_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 %mask to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> %passthru +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> , <64 x i8> %passthru, i64 %mask) + ret <64 x i8> %a +}