Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -4816,3 +4816,41 @@ def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; } + +let TargetPrefix = "x86" in { + def int_x86_avx512bf16_cvtne2ps2bf16_128: + GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_avx512bf16_cvtne2ps2bf16_256: + GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_avx512bf16_cvtne2ps2bf16_512: + GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, + Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], + [IntrNoMem]>; + // Intrinsic must be masked due to it producing less than 128 bits of results. + def int_x86_avx512bf16_mask_cvtneps2bf16_128: + Intrinsic<[llvm_v8i16_ty], + [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty], + [IntrNoMem]>; + def int_x86_avx512bf16_cvtneps2bf16_256: + GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_cvtneps2bf16_512: + GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_dpbf16ps_128: + GCCBuiltin<"__builtin_ia32_dpbf16ps_128">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_dpbf16ps_256: + GCCBuiltin<"__builtin_ia32_dpbf16ps_256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_dpbf16ps_512: + GCCBuiltin<"__builtin_ia32_dpbf16ps_512">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; +} Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1375,6 +1375,9 @@ // detecting features using the "-march=native" flag. // For more info, see X86 ISA docs. Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); + bool HasLeaf7Subleaf1 = + MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -167,6 +167,9 @@ def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", "Enable AVX-512 Vector Neural Network Instructions", [FeatureAVX512]>; +def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", + "Support bfloat16 floating point", + [FeatureBWI]>; def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", "Enable AVX-512 Bit Algorithms", [FeatureBWI]>; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -509,6 +509,13 @@ MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, MCVTSI2P, MCVTUI2P, + // Vector float to bfloat16 + CVTNE2PS2BF16, CVTNEPS2BF16, DPBF16PS, + + // Masked version of above. + // SRC, PASSTHRU, MASK + MCVTNEPS2BF16, + // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -22516,6 +22516,21 @@ PassThru, Mask); } + case CVTNEPS2BF16_MASK: { + SDValue Src = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + + if (ISD::isBuildVectorAllOnes(Mask.getNode())) + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); + + // Break false dependency. + if (PassThru.isUndef()) + PassThru = DAG.getConstant(0, dl, PassThru.getValueType()); + + return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, + Mask); + } default: break; } @@ -27943,6 +27958,10 @@ case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI"; case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; + case X86ISD::CVTNE2PS2BF16: return "X86ISD::CVTNE2PS2BF16"; + case X86ISD::CVTNEPS2BF16: return "X86ISD::CVTNEPS2BF16"; + case X86ISD::MCVTNEPS2BF16: return "X86ISD::MCVTNEPS2BF16"; + case X86ISD::DPBF16PS: return "X86ISD::DPBF16PS"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; case X86ISD::MGATHER: return "X86ISD::MGATHER"; case X86ISD::MSCATTER: return "X86ISD::MSCATTER"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -12635,3 +12635,143 @@ Sched<[SchedWriteFMA.ZMM.Folded]>; } +multiclass avx512_binop_all2 opc, string OpcodeStr, + X86SchedWriteWidths sched, + AVX512VLVectorVTInfo _SrcVTInfo, + AVX512VLVectorVTInfo _DstVTInfo, + SDNode OpNode, Predicate prd, + bit IsCommutable = 0> { + let Predicates = [prd] in + defm NAME#Z : avx512_binop_rm2, + EVEX_V512, EVEX_CD8<32, CD8VF>; + let Predicates = [HasVLX, prd] in { + defm NAME#Z256 : avx512_binop_rm2, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm NAME#Z128 : avx512_binop_rm2, + EVEX_V128, EVEX_CD8<32, CD8VF>; + } +} + +defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", + SchedWriteVecALU, + avx512vl_f32_info, avx512vl_i16_info, + X86cvtne2ps2bf16, HasBF16, 0>, T8XD; + +// Truncate Float to BFloat16 +multiclass avx512_cvtps2bf16 opc, string OpcodeStr, + X86SchedWriteWidths sched> { + let Predicates = [HasBF16] in { + defm Z : avx512_vcvt_fp, EVEX_V512; + } + let Predicates = [HasBF16, HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + + def : InstAlias(NAME # "Z128rr") VR128X:$dst, + VR128X:$src), 0>; + def : InstAlias(NAME # "Z128rm") VR128X:$dst, + f128mem:$src), 0, "intel">; + def : InstAlias(NAME # "Z256rr") VR128X:$dst, + VR256X:$src), 0>; + def : InstAlias(NAME # "Z256rm") VR128X:$dst, + f256mem:$src), 0, "intel">; + } +} + +defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", + SchedWriteCvtPD2PS>, T8XS, + EVEX_CD8<32, CD8VF>; + +let Predicates = [HasBF16, HasVLX] in { + // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), + (VCVTNEPS2BF16Z128rr VR128X:$src)>; + def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), + VK4WM:$mask), + (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; + def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, + VK4WM:$mask), + (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; + + def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), + (VCVTNEPS2BF16Z128rm addr:$src)>; + def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), + VK4WM:$mask), + (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; + def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, + VK4WM:$mask), + (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; + + def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 + (X86VBroadcast (loadf32 addr:$src))))), + (VCVTNEPS2BF16Z128rmb addr:$src)>; + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + (v8i16 VR128X:$src0), VK4WM:$mask), + (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + v8i16x_info.ImmAllZerosV, VK4WM:$mask), + (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; +} + +let Constraints = "$src1 = $dst" in { +multiclass avx512_dpbf16ps_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, X86VectorVTInfo src_v> { + defm r: AVX512_maskable_3src, + EVEX_4V; + + defm m: AVX512_maskable_3src, EVEX_4V; + + defm mb: AVX512_maskable_3src, + EVEX_B, EVEX_4V; + +} +} // Constraints = "$src1 = $dst" + +multiclass avx512_dpbf16ps_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _, + AVX512VLVectorVTInfo src_v, Predicate prd> { + let Predicates = [prd] in { + defm Z : avx512_dpbf16ps_rm, EVEX_V512; + } + let Predicates = [HasVLX, prd] in { + defm Z256 : avx512_dpbf16ps_rm, EVEX_V256; + defm Z128 : avx512_dpbf16ps_rm, EVEX_V128; + } +} + +defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, + avx512vl_f32_info, avx512vl_i32_info, + HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -662,6 +662,25 @@ SDTCisOpSmallerThanOp<0, 1>, SDTCisVT<2, i32>]>>; +// cvt fp to bfloat16 +def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<1,2>]>>; +def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16", + SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, f32>, + SDTCisSameAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<1, 3>]>>; +def X86cvtneps2bf16 : SDNode<"X86ISD::CVTNEPS2BF16", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, f32>]>>; +def X86dpbf16ps : SDNode<"X86ISD::DPBF16PS", + SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, + SDTCisSameAs<0,1>, + SDTCVecEltisVT<2, i32>, + SDTCisSameAs<2,3>]>>; + // galois field arithmetic def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -835,6 +835,7 @@ def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; def HasVNNI : Predicate<"Subtarget->hasVNNI()">; +def HasBF16 : Predicate<"Subtarget->hasBF16()">; def HasBITALG : Predicate<"Subtarget->hasBITALG()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -19,6 +19,7 @@ namespace llvm { enum IntrinsicType : uint16_t { + CVTNEPS2BF16_MASK, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, INTR_TYPE_3OP_IMM8, @@ -977,6 +978,16 @@ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0), X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0), X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0), + // bfloat16 + X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_128, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0), + X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_256, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0), + X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_512, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0), + X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_256, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0), + X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_512, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0), + X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_128, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0), + X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0), + X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0), + X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16), X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0), Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -353,6 +353,9 @@ /// Processor has AVX-512 Vector Neural Network Instructions bool HasVNNI = false; + /// Processor has AVX-512 bfloat16 floating-point extenstions + bool HasBF16 = false; + /// Processor has AVX-512 Bit Algorithms instructions bool HasBITALG = false; @@ -664,6 +667,7 @@ bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } bool hasVNNI() const { return HasVNNI; } + bool hasBF16() const { return HasBF16; } bool hasBITALG() const { return HasBITALG; } bool hasMPX() const { return HasMPX; } bool hasSHSTK() const { return HasSHSTK; } Index: test/CodeGen/X86/avx512bf16-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx512bf16-intrinsics.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bf16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 + +declare <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float>, <16 x float>) #3 + +define <8 x i64> @test_mm512_cvtne2ps2bf16_512(<16 x float> %A, <16 x float> %B) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm512_cvtne2ps2bf16_512: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0x72,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4 + %1 = bitcast <32 x i16> %0 to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @test_mm512_maskz_cvtne2ps2bf16_512(<16 x float> %A, <16 x float> %B, i32 %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm512_maskz_cvtne2ps2bf16_512: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xc9,0x72,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_maskz_cvtne2ps2bf16_512: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xc9,0x72,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4 + %1 = bitcast i32 %U to <32 x i1> + %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer + %3 = bitcast <32 x i16> %2 to <8 x i64> + ret <8 x i64> %3 +} + +define <8 x i64> @test_mm512_mask_cvtne2ps2bf16_512(<8 x i64> %C, i32 %U, <16 x float> %A, <16 x float> %B) local_unnamed_addr #2 { +; X86-LABEL: test_mm512_mask_cvtne2ps2bf16_512: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtne2ps2bf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x72,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_mask_cvtne2ps2bf16_512: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtne2ps2bf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x72,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4 + %1 = bitcast <8 x i64> %C to <32 x i16> + %2 = bitcast i32 %U to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1 + %4 = bitcast <32 x i16> %3 to <8 x i64> + ret <8 x i64> %4 +} + +declare <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float>) #3 + +define <4 x i64> @test_mm512_cvtneps2bf16_512(<16 x float> %A) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm512_cvtneps2bf16_512: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcvtneps2bf16 %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x72,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4 + %1 = bitcast <16 x i16> %0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @test_mm512_maskz_cvtneps2bf16_512(<16 x float> %A, i16 %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm512_maskz_cvtneps2bf16_512: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtneps2bf16 %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x72,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_maskz_cvtneps2bf16_512: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x72,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4 + %1 = bitcast i16 %U to <16 x i1> + %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer + %3 = bitcast <16 x i16> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @test_mm512_mask_cvtneps2bf16_512(<4 x i64> %C, i16 %U, <16 x float> %A) local_unnamed_addr #2 { +; X86-LABEL: test_mm512_mask_cvtneps2bf16_512: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtneps2bf16 %zmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x72,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_mask_cvtneps2bf16_512: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %zmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x72,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4 + %1 = bitcast <4 x i64> %C to <16 x i16> + %2 = bitcast i16 %U to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + %4 = bitcast <16 x i16> %3 to <4 x i64> + ret <4 x i64> %4 +} + +declare <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float>, <16 x i32>, <16 x i32>) #3 + +define <16 x float> @test_mm512_dpbf16ps_512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm512_dpbf16ps_512: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x52,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4 + ret <16 x float> %0 +} + +define <16 x float> @test_mm512_maskz_dpbf16ps_512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B, i16 zeroext %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm512_maskz_dpbf16ps_512: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x52,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_maskz_dpbf16ps_512: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x52,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4 + %1 = bitcast i16 %U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + ret <16 x float> %2 +} +define <16 x float> @test_mm512_mask_dpbf16ps_512(i16 zeroext %U, <16 x float> %E, <16 x i32> %A, <16 x i32> %B) local_unnamed_addr #2 { +; X86-LABEL: test_mm512_mask_dpbf16ps_512: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x52,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_mask_dpbf16ps_512: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x52,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4 + %1 = bitcast i16 %U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %E + ret <16 x float> %2 +} Index: test/CodeGen/X86/avx512bf16-vl-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx512bf16-vl-intrinsics.ll @@ -0,0 +1,358 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bf16 -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 + +declare <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float>, <4 x float>) #1 + +define <2 x i64> @test_mm_cvtne2ps2bf16_128(<4 x float> %A, <4 x float> %B) local_unnamed_addr #0 { +; CHECK-LABEL: test_mm_cvtne2ps2bf16_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7f,0x08,0x72,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2 + %1 = bitcast <8 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm_maskz_cvtne2ps2bf16_128(<4 x float> %A, <4 x float> %B, i8 zeroext %U) local_unnamed_addr #0 { +; X86-LABEL: test_mm_maskz_cvtne2ps2bf16_128: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0x89,0x72,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm_maskz_cvtne2ps2bf16_128: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0x89,0x72,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2 + %1 = bitcast i8 %U to <8 x i1> + %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer + %3 = bitcast <8 x i16> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <2 x i64> @test_mm_mask_cvtne2ps2bf16_128(<2 x i64> %C, i8 zeroext %U, <4 x float> %A, <4 x float> %B) local_unnamed_addr #0 { +; X86-LABEL: test_mm_mask_cvtne2ps2bf16_128: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtne2ps2bf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x09,0x72,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm_mask_cvtne2ps2bf16_128: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtne2ps2bf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x09,0x72,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2 + %1 = bitcast <2 x i64> %C to <8 x i16> + %2 = bitcast i8 %U to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + %4 = bitcast <8 x i16> %3 to <2 x i64> + ret <2 x i64> %4 +} + +declare <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float>, <8 x float>) #3 + +define <4 x i64> @test_mm256_cvtne2ps2bf16_256(<8 x float> %A, <8 x float> %B) local_unnamed_addr #1 { +; CHECK-LABEL: test_mm256_cvtne2ps2bf16_256: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7f,0x28,0x72,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4 + %1 = bitcast <16 x i16> %0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @test_mm256_maskz_cvtne2ps2bf16_256(<8 x float> %A, <8 x float> %B, i16 zeroext %U) local_unnamed_addr #1 { +; X86-LABEL: test_mm256_maskz_cvtne2ps2bf16_256: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xa9,0x72,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm256_maskz_cvtne2ps2bf16_256: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xa9,0x72,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4 + %1 = bitcast i16 %U to <16 x i1> + %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer + %3 = bitcast <16 x i16> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @test_mm256_mask_cvtne2ps2bf16_256(<4 x i64> %C, i16 zeroext %U, <8 x float> %A, <8 x float> %B) local_unnamed_addr #1 { +; X86-LABEL: test_mm256_mask_cvtne2ps2bf16_256: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtne2ps2bf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x77,0x29,0x72,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm256_mask_cvtne2ps2bf16_256: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtne2ps2bf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x77,0x29,0x72,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4 + %1 = bitcast <4 x i64> %C to <16 x i16> + %2 = bitcast i16 %U to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + %4 = bitcast <16 x i16> %3 to <4 x i64> + ret <4 x i64> %4 +} + +declare <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float>) #3 + +define <2 x i64> @test_mm256_cvtneps2bf16_256(<8 x float> %A) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm256_cvtneps2bf16_256: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0xc0] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4 + %1 = bitcast <8 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm256_maskz_cvtneps2bf16_256(<8 x float> %A, i8 zeroext %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm256_maskz_cvtneps2bf16_256: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtneps2bf16 %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x72,0xc0] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm256_maskz_cvtneps2bf16_256: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x72,0xc0] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4 + %1 = bitcast i8 %U to <8 x i1> + %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer + %3 = bitcast <8 x i16> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <2 x i64> @test_mm256_mask_cvtneps2bf16_256(<2 x i64> %C, i8 zeroext %U, <8 x float> %A) local_unnamed_addr #2 { +; X86-LABEL: test_mm256_mask_cvtneps2bf16_256: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtneps2bf16 %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x72,0xc1] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm256_mask_cvtneps2bf16_256: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x72,0xc1] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4 + %1 = bitcast <2 x i64> %C to <8 x i16> + %2 = bitcast i8 %U to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + %4 = bitcast <8 x i16> %3 to <2 x i64> + ret <2 x i64> %4 +} + +declare <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float>, <8 x i16>, <4 x i1>) #3 + +define <2 x i64> @test_mm128_cvtneps2bf16_128(<4 x float> %A) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm128_cvtneps2bf16_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> undef, <4 x i1> ) #4 + %1 = bitcast <8 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm128_maskz_cvtneps2bf16_128(<4 x float> %A, i8 zeroext %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm128_maskz_cvtneps2bf16_128: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtneps2bf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x72,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm128_maskz_cvtneps2bf16_128: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x72,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = bitcast i8 %U to <8 x i1> + %1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> zeroinitializer, <4 x i1> %1) #4 + %3 = bitcast <8 x i16> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <2 x i64> @test_mm128_mask_cvtneps2bf16_128(<2 x i64> %C, i8 zeroext %U, <4 x float> %A) local_unnamed_addr #2 { +; X86-LABEL: test_mm128_mask_cvtneps2bf16_128: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtneps2bf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x72,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm128_mask_cvtneps2bf16_128: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x72,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = bitcast i8 %U to <8 x i1> + %1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %2 = bitcast <2 x i64> %C to <8 x i16> + %3 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> %2, <4 x i1> %1) #4 + %4 = bitcast <8 x i16> %3 to <2 x i64> + ret <2 x i64> %4 +} + +; Make sure we don't fold a select into the 128 bit form of cvtneps2bf16. It +; always writes zeros to bits 127:64 regardless of mask. +define <2 x i64> @test_mm128_cvtneps2bf16_128_select(<2 x i64> %C, i8 zeroext %U, <4 x float> %A) local_unnamed_addr #2 { +; X86-LABEL: test_mm128_cvtneps2bf16_128_select: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vcvtneps2bf16 %xmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc9] +; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm128_cvtneps2bf16_128_select: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vcvtneps2bf16 %xmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc9] +; X64-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = bitcast i8 %U to <8 x i1> + %1 = bitcast <2 x i64> %C to <8 x i16> + %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> undef, <4 x i1> ) #4 + %3 = select <8 x i1> %0, <8 x i16> %2, <8 x i16> %1 + %4 = bitcast <8 x i16> %3 to <2 x i64> + ret <2 x i64> %4 +} + +declare <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float>, <8 x i32>, <8 x i32>) #3 + +define <8 x float> @test_mm256_dpbf16ps_256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm256_dpbf16ps_256: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x52,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4 + ret <8 x float> %0 +} + +define <8 x float> @test_mm256_maskz_dpbf16ps_256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B, i8 zeroext %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm256_maskz_dpbf16ps_256: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xa9,0x52,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm256_maskz_dpbf16ps_256: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xa9,0x52,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4 + %1 = bitcast i8 %U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + ret <8 x float> %2 +} +define <8 x float> @test_mm256_mask_dpbf16ps_256(i8 zeroext %U, <8 x float> %E, <8 x i32> %A, <8 x i32> %B) local_unnamed_addr #2 { +; X86-LABEL: test_mm256_mask_dpbf16ps_256: +; X86: # %bb.0: # %entry +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x76,0x29,0x52,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm256_mask_dpbf16ps_256: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x76,0x29,0x52,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4 + %1 = bitcast i8 %U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %E + ret <8 x float> %2 +} + +declare <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float>, <4 x i32>, <4 x i32>) #3 + +define <4 x float> @test_mm128_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 { +; CHECK-LABEL: test_mm128_dpbf16ps_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x52,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4x i32> %B) #4 + ret <4 x float> %0 +} + +define <4 x float> @test_mm128_maskz_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B, i4 zeroext %U) local_unnamed_addr #2 { +; X86-LABEL: test_mm128_maskz_dpbf16ps_128: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm128_maskz_dpbf16ps_128: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) #4 + %1 = bitcast i4 %U to <4 x i1> + %2 = select <4 x i1> %1, <4 x float> %0, <4 x float> zeroinitializer + ret <4 x float> %2 +} +define <4 x float> @test_mm128_mask_dpbf16ps_128(i4 zeroext %U, <4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 { +; X86-LABEL: test_mm128_mask_dpbf16ps_128: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm128_mask_dpbf16ps_128: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) #4 + %1 = bitcast i4 %U to <4 x i1> + %2 = select <4 x i1> %1, <4 x float> %0, <4 x float> %E + ret <4 x float> %2 +} Index: test/MC/X86/avx512_bf16-encoding.s =================================================================== --- /dev/null +++ test/MC/X86/avx512_bf16-encoding.s @@ -0,0 +1,90 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding < %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xf4] + vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x4f,0x72,0xf4] + vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x57,0xcf,0x72,0xf4] + vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} {z} + +// CHECK: vcvtne2ps2bf16 (%ecx), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x31] + vcvtne2ps2bf16 (%ecx), %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 291(%esp,%esi,8), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 291(%esp,%esi,8), %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 268435456(%esp,%esi,8), %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 -64(%esp), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x74,0x24,0xff] + vcvtne2ps2bf16 -64(%esp), %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 (%eax){1to16}, %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x30] + vcvtne2ps2bf16 (%eax){1to16}, %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 8128(%edx), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x7f] + vcvtne2ps2bf16 8128(%edx), %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 -8192(%edx), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x80] + vcvtne2ps2bf16 -8192(%edx), %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 508(%edx){1to16}, %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x7f] + vcvtne2ps2bf16 508(%edx){1to16}, %zmm5, %zmm6 + +// CHECK: vcvtne2ps2bf16 -512(%edx){1to16}, %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x80] + vcvtne2ps2bf16 -512(%edx){1to16}, %zmm5, %zmm6 + +// CHECK: vcvtneps2bf16 %zmm5, %ymm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0xf5] + vcvtneps2bf16 %zmm5, %ymm6 + +// CHECK: vcvtneps2bf16 268435456(%esp,%esi,8), %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x7e,0x4f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtneps2bf16 268435456(%esp,%esi,8), %ymm6 {%k7} + +// CHECK: vcvtneps2bf16 (%ecx){1to16}, %ymm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x58,0x72,0x31] + vcvtneps2bf16 (%ecx){1to16}, %ymm6 + +// CHECK: vcvtneps2bf16 8128(%ecx), %ymm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0x71,0x7f] + vcvtneps2bf16 8128(%ecx), %ymm6 + +// CHECK: vcvtneps2bf16 -512(%edx){1to16}, %ymm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xdf,0x72,0x72,0x80] + vcvtneps2bf16 -512(%edx){1to16}, %ymm6 {%k7} {z} + +// CHECK: vdpbf16ps %zmm4, %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0xf4] + vdpbf16ps %zmm4, %zmm5, %zmm6 + +// CHECK: vdpbf16ps 268435456(%esp,%esi,8), %zmm5, %zmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x56,0x4f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + vdpbf16ps 268435456(%esp,%esi,8), %zmm5, %zmm6 {%k7} + +// CHECK: vdpbf16ps (%ecx){1to16}, %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x58,0x52,0x31] + vdpbf16ps (%ecx){1to16}, %zmm5, %zmm6 + +// CHECK: vdpbf16ps 8128(%ecx), %zmm5, %zmm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0x71,0x7f] + vdpbf16ps 8128(%ecx), %zmm5, %zmm6 + +// CHECK: vdpbf16ps -512(%edx){1to16}, %zmm5, %zmm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x56,0xdf,0x52,0x72,0x80] + vdpbf16ps -512(%edx){1to16}, %zmm5, %zmm6 {%k7} {z} + Index: test/MC/X86/avx512_bf16_vl-encoding.s =================================================================== --- /dev/null +++ test/MC/X86/avx512_bf16_vl-encoding.s @@ -0,0 +1,170 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding < %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xf4] + vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x57,0x8f,0x72,0xf4] + vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} {z} + +// CHECK: vcvtne2ps2bf16 (%ecx), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x31] + vcvtne2ps2bf16 (%ecx), %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 291(%esp,%esi,8), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 291(%esp,%esi,8), %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 -16(%esp), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x74,0x24,0xff] + vcvtne2ps2bf16 -16(%esp), %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 (%eax){1to4}, %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x30] + vcvtne2ps2bf16 (%eax){1to4}, %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 2032(%edx), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x7f] + vcvtne2ps2bf16 2032(%edx), %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 -2048(%edx), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x80] + vcvtne2ps2bf16 -2048(%edx), %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 508(%edx){1to4}, %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x7f] + vcvtne2ps2bf16 508(%edx){1to4}, %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 -512(%edx){1to4}, %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x80] + vcvtne2ps2bf16 -512(%edx){1to4}, %xmm5, %xmm6 {%k7} + +// CHECK: vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xf4] + vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x57,0xaf,0x72,0xf4] + vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} {z} + +// CHECK: vcvtne2ps2bf16 (%ecx), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x31] + vcvtne2ps2bf16 (%ecx), %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 291(%esp,%esi,8), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 291(%esp,%esi,8), %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 -32(%esp), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x74,0x24,0xff] + vcvtne2ps2bf16 -32(%esp), %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 (%eax){1to8}, %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x30] + vcvtne2ps2bf16 (%eax){1to8}, %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 4064(%edx), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x7f] + vcvtne2ps2bf16 4064(%edx), %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 -4096(%edx), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x80] + vcvtne2ps2bf16 -4096(%edx), %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 508(%edx){1to8}, %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x7f] + vcvtne2ps2bf16 508(%edx){1to8}, %ymm5, %ymm6 {%k7} + +// CHECK: vcvtne2ps2bf16 -512(%edx){1to8}, %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x80] + vcvtne2ps2bf16 -512(%edx){1to8}, %ymm5, %ymm6 {%k7} + +// CHECK: vcvtneps2bf16 %xmm5, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0xf5] + vcvtneps2bf16 %xmm5, %xmm6 + +// CHECK: vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x7e,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm6 {%k7} + +// CHECK: vcvtneps2bf16 (%ecx){1to4}, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x18,0x72,0x31] + vcvtneps2bf16 (%ecx){1to4}, %xmm6 + +// CHECK: vcvtneps2bf16x 2032(%ecx), %xmm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0x71,0x7f] + vcvtneps2bf16x 2032(%ecx), %xmm6 + +// CHECK: vcvtneps2bf16 -512(%edx){1to4}, %xmm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x9f,0x72,0x72,0x80] + vcvtneps2bf16 -512(%edx){1to4}, %xmm6 {%k7} {z} + +// CHECK: vcvtneps2bf16 %ymm5, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0xf5] + vcvtneps2bf16 %ymm5, %xmm6 + +// CHECK: vcvtneps2bf16y 268435456(%esp,%esi,8), %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x7e,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtneps2bf16y 268435456(%esp,%esi,8), %xmm6 {%k7} + +// CHECK: vcvtneps2bf16 (%ecx){1to8}, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x38,0x72,0x31] + vcvtneps2bf16 (%ecx){1to8}, %xmm6 + +// CHECK: vcvtneps2bf16y 4064(%ecx), %xmm6 +// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0x71,0x7f] + vcvtneps2bf16y 4064(%ecx), %xmm6 + +// CHECK: vcvtneps2bf16 -512(%edx){1to8}, %xmm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xbf,0x72,0x72,0x80] + vcvtneps2bf16 -512(%edx){1to8}, %xmm6 {%k7} {z} + +// CHECK: vdpbf16ps %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0xf4] + vdpbf16ps %ymm4, %ymm5, %ymm6 + +// CHECK: vdpbf16ps 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x56,0x2f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + vdpbf16ps 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7} + +// CHECK: vdpbf16ps (%ecx){1to8}, %ymm5, %ymm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x38,0x52,0x31] + vdpbf16ps (%ecx){1to8}, %ymm5, %ymm6 + +// CHECK: vdpbf16ps 4064(%ecx), %ymm5, %ymm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0x71,0x7f] + vdpbf16ps 4064(%ecx), %ymm5, %ymm6 + +// CHECK: vdpbf16ps -512(%edx){1to8}, %ymm5, %ymm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x56,0xbf,0x52,0x72,0x80] + vdpbf16ps -512(%edx){1to8}, %ymm5, %ymm6 {%k7} {z} + +// CHECK: vdpbf16ps %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0xf4] + vdpbf16ps %xmm4, %xmm5, %xmm6 + +// CHECK: vdpbf16ps 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7} +// CHECK: encoding: [0x62,0xf2,0x56,0x0f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + vdpbf16ps 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7} + +// CHECK: vdpbf16ps (%ecx){1to4}, %xmm5, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x18,0x52,0x31] + vdpbf16ps (%ecx){1to4}, %xmm5, %xmm6 + +// CHECK: vdpbf16ps 2032(%ecx), %xmm5, %xmm6 +// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0x71,0x7f] + vdpbf16ps 2032(%ecx), %xmm5, %xmm6 + +// CHECK: vdpbf16ps -512(%edx){1to4}, %xmm5, %xmm6 {%k7} {z} +// CHECK: encoding: [0x62,0xf2,0x56,0x9f,0x52,0x72,0x80] + vdpbf16ps -512(%edx){1to4}, %xmm5, %xmm6 {%k7} {z} + Index: test/MC/X86/intel-syntax-avx512_bf16.s =================================================================== --- /dev/null +++ test/MC/X86/intel-syntax-avx512_bf16.s @@ -0,0 +1,90 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmm4 +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xf4] + vcvtne2ps2bf16 zmm6, zmm5, zmm4 + +// CHECK: vcvtne2ps2bf16 zmm6 {k7}, zmm5, zmm4 +// CHECK: encoding: [0x62,0xf2,0x57,0x4f,0x72,0xf4] + vcvtne2ps2bf16 zmm6 {k7}, zmm5, zmm4 + +// CHECK: vcvtne2ps2bf16 zmm6 {k7} {z}, zmm5, zmm4 +// CHECK: encoding: [0x62,0xf2,0x57,0xcf,0x72,0xf4] + vcvtne2ps2bf16 zmm6 {k7} {z}, zmm5, zmm4 + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [ecx] +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x31] + vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [ecx] + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 291] +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 291] + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp - 4] +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff] + vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp - 4] + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x30] + vcvtne2ps2bf16 zmm6, zmm5, dword ptr [eax]{1to16} + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx + 8128] +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x7f] + vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx + 8128] + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx - 8192] +// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x80] + vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx - 8192] + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx + 508]{1to16} +// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x7f] + vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx + 508]{1to16} + +// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx - 512]{1to16} +// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x80] + vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx - 512]{1to16} + +// CHECK: vcvtneps2bf16 ymm6, zmm5 +// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0xf5] + vcvtneps2bf16 ymm6, zmm5 + +// CHECK: vcvtneps2bf16 ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x7e,0x4f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtneps2bf16 ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneps2bf16 ymm6, dword ptr [ecx]{1to16} +// CHECK: encoding: [0x62,0xf2,0x7e,0x58,0x72,0x31] + vcvtneps2bf16 ymm6, dword ptr [ecx]{1to16} + +// CHECK: vcvtneps2bf16 ymm6, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0x71,0x7f] + vcvtneps2bf16 ymm6, zmmword ptr [ecx + 8128] + +// CHECK: vcvtneps2bf16 ymm6 {k7} {z}, dword ptr [edx - 512]{1to16} +// CHECK: encoding: [0x62,0xf2,0x7e,0xdf,0x72,0x72,0x80] + vcvtneps2bf16 ymm6 {k7} {z}, dword ptr [edx - 512]{1to16} + +// CHECK: vdpbf16ps zmm6, zmm5, zmm4 +// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0xf4] + vdpbf16ps zmm6, zmm5, zmm4 + +// CHECK: vdpbf16ps zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x56,0x4f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + vdpbf16ps zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vdpbf16ps zmm6, zmm5, dword ptr [ecx]{1to16} +// CHECK: encoding: [0x62,0xf2,0x56,0x58,0x52,0x31] + vdpbf16ps zmm6, zmm5, dword ptr [ecx]{1to16} + +// CHECK: vdpbf16ps zmm6, zmm5, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0x71,0x7f] + vdpbf16ps zmm6, zmm5, zmmword ptr [ecx + 8128] + +// CHECK: vdpbf16ps zmm6 {k7} {z}, zmm5, dword ptr [edx - 512]{1to16} +// CHECK: encoding: [0x62,0xf2,0x56,0xdf,0x52,0x72,0x80] + vdpbf16ps zmm6 {k7} {z}, zmm5, dword ptr [edx - 512]{1to16} + Index: test/MC/X86/intel-syntax-avx512_bf16_vl.s =================================================================== --- /dev/null +++ test/MC/X86/intel-syntax-avx512_bf16_vl.s @@ -0,0 +1,170 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmm4 +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xf4] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmm4 + +// CHECK: vcvtne2ps2bf16 xmm6 {k7} {z}, xmm5, xmm4 +// CHECK: encoding: [0x62,0xf2,0x57,0x8f,0x72,0xf4] + vcvtne2ps2bf16 xmm6 {k7} {z}, xmm5, xmm4 + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [ecx] +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x31] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [ecx] + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 291] +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 291] + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp - 4] +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp - 4] + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [eax]{1to4} +// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x30] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [eax]{1to4} + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx + 2032] +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x7f] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx + 2032] + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx - 2048] +// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x80] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx - 2048] + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx + 508]{1to4} +// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x7f] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx + 508]{1to4} + +// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx - 512]{1to4} +// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x80] + vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx - 512]{1to4} + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymm4 +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xf4] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymm4 + +// CHECK: vcvtne2ps2bf16 ymm6 {k7} {z}, ymm5, ymm4 +// CHECK: encoding: [0x62,0xf2,0x57,0xaf,0x72,0xf4] + vcvtne2ps2bf16 ymm6 {k7} {z}, ymm5, ymm4 + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [ecx] +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x31] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [ecx] + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 291] +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 291] + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp - 4] +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp - 4] + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x30] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [eax]{1to8} + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx + 4064] +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x7f] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx + 4064] + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx - 4096] +// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x80] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx - 4096] + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx + 508]{1to8} +// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x7f] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx + 508]{1to8} + +// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx - 512]{1to8} +// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x80] + vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx - 512]{1to8} + +// CHECK: vcvtneps2bf16 xmm6, xmm5 +// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0xf5] + vcvtneps2bf16 xmm6, xmm5 + +// CHECK: vcvtneps2bf16 xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x7e,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtneps2bf16 xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneps2bf16 xmm6, dword ptr [ecx]{1to4} +// CHECK: encoding: [0x62,0xf2,0x7e,0x18,0x72,0x31] + vcvtneps2bf16 xmm6, dword ptr [ecx]{1to4} + +// CHECK: vcvtneps2bf16 xmm6, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0x71,0x7f] + vcvtneps2bf16 xmm6, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to4} +// CHECK: encoding: [0x62,0xf2,0x7e,0x9f,0x72,0x72,0x80] + vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to4} + +// CHECK: vcvtneps2bf16 xmm6, ymm5 +// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0xf5] + vcvtneps2bf16 xmm6, ymm5 + +// CHECK: vcvtneps2bf16 xmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x7e,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10] + vcvtneps2bf16 xmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneps2bf16 xmm6, dword ptr [ecx]{1to8} +// CHECK: encoding: [0x62,0xf2,0x7e,0x38,0x72,0x31] + vcvtneps2bf16 xmm6, dword ptr [ecx]{1to8} + +// CHECK: vcvtneps2bf16 xmm6, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0x71,0x7f] + vcvtneps2bf16 xmm6, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to8} +// CHECK: encoding: [0x62,0xf2,0x7e,0xbf,0x72,0x72,0x80] + vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to8} + +// CHECK: vdpbf16ps ymm6, ymm5, ymm4 +// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0xf4] + vdpbf16ps ymm6, ymm5, ymm4 + +// CHECK: vdpbf16ps ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x56,0x2f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + vdpbf16ps ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vdpbf16ps ymm6, ymm5, dword ptr [ecx]{1to8} +// CHECK: encoding: [0x62,0xf2,0x56,0x38,0x52,0x31] + vdpbf16ps ymm6, ymm5, dword ptr [ecx]{1to8} + +// CHECK: vdpbf16ps ymm6, ymm5, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0x71,0x7f] + vdpbf16ps ymm6, ymm5, ymmword ptr [ecx + 4064] + +// CHECK: vdpbf16ps ymm6 {k7} {z}, ymm5, dword ptr [edx - 512]{1to8} +// CHECK: encoding: [0x62,0xf2,0x56,0xbf,0x52,0x72,0x80] + vdpbf16ps ymm6 {k7} {z}, ymm5, dword ptr [edx - 512]{1to8} + +// CHECK: vdpbf16ps xmm6, xmm5, xmm4 +// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0xf4] + vdpbf16ps xmm6, xmm5, xmm4 + +// CHECK: vdpbf16ps xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x56,0x0f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + vdpbf16ps xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vdpbf16ps xmm6, xmm5, dword ptr [ecx]{1to4} +// CHECK: encoding: [0x62,0xf2,0x56,0x18,0x52,0x31] + vdpbf16ps xmm6, xmm5, dword ptr [ecx]{1to4} + +// CHECK: vdpbf16ps xmm6, xmm5, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0x71,0x7f] + vdpbf16ps xmm6, xmm5, xmmword ptr [ecx + 2032] + +// CHECK: vdpbf16ps xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]{1to4} +// CHECK: encoding: [0x62,0xf2,0x56,0x9f,0x52,0x72,0x80] + vdpbf16ps xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]{1to4} + Index: test/MC/X86/intel-syntax-x86-64-avx512_bf16.s =================================================================== --- /dev/null +++ test/MC/X86/intel-syntax-x86-64-avx512_bf16.s @@ -0,0 +1,90 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmm28 +// CHECK: encoding: [0x62,0x02,0x17,0x40,0x72,0xf4] + vcvtne2ps2bf16 zmm30, zmm29, zmm28 + +// CHECK: vcvtne2ps2bf16 zmm30 {k7}, zmm29, zmm28 +// CHECK: encoding: [0x62,0x02,0x17,0x47,0x72,0xf4] + vcvtne2ps2bf16 zmm30 {k7}, zmm29, zmm28 + +// CHECK: vcvtne2ps2bf16 zmm30 {k7} {z}, zmm29, zmm28 +// CHECK: encoding: [0x62,0x02,0x17,0xc7,0x72,0xf4] + vcvtne2ps2bf16 zmm30 {k7} {z}, zmm29, zmm28 + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rcx] +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x31] + vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rcx] + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 291] +// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 291] + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 268435456] + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rsp - 4] +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff] + vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rsp - 4] + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rcx]{1to16} +// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x31] + vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rcx]{1to16} + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx + 8128] +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x7f] + vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx + 8128] + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x80] + vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx - 8192] + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx + 508]{1to16} +// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x7f] + vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx + 508]{1to16} + +// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx - 512]{1to16} +// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x80] + vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx - 512]{1to16} + +// CHECK: vcvtneps2bf16 ymm30, zmm29 +// CHECK: encoding: [0x62,0x02,0x7e,0x48,0x72,0xf5] + vcvtneps2bf16 ymm30, zmm29 + +// CHECK: vcvtneps2bf16 ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x7e,0x4f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcvtneps2bf16 ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneps2bf16 ymm30, dword ptr [r9]{1to16} +// CHECK: encoding: [0x62,0x42,0x7e,0x58,0x72,0x31] + vcvtneps2bf16 ymm30, dword ptr [r9]{1to16} + +// CHECK: vcvtneps2bf16 ymm30, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x72,0x71,0x7f] + vcvtneps2bf16 ymm30, zmmword ptr [rcx + 8128] + +// CHECK: vcvtneps2bf16 ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16} +// CHECK: encoding: [0x62,0x62,0x7e,0xdf,0x72,0x72,0x80] + vcvtneps2bf16 ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16} + +// CHECK: vdpbf16ps zmm30, zmm29, zmm28 +// CHECK: encoding: [0x62,0x02,0x16,0x40,0x52,0xf4] + vdpbf16ps zmm30, zmm29, zmm28 + +// CHECK: vdpbf16ps zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x16,0x47,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdpbf16ps zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vdpbf16ps zmm30, zmm29, dword ptr [r9]{1to16} +// CHECK: encoding: [0x62,0x42,0x16,0x50,0x52,0x31] + vdpbf16ps zmm30, zmm29, dword ptr [r9]{1to16} + +// CHECK: vdpbf16ps zmm30, zmm29, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0x62,0x16,0x40,0x52,0x71,0x7f] + vdpbf16ps zmm30, zmm29, zmmword ptr [rcx + 8128] + +// CHECK: vdpbf16ps zmm30 {k7} {z}, zmm29, dword ptr [rdx - 512]{1to16} +// CHECK: encoding: [0x62,0x62,0x16,0xd7,0x52,0x72,0x80] + vdpbf16ps zmm30 {k7} {z}, zmm29, dword ptr [rdx - 512]{1to16} + Index: test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s =================================================================== --- /dev/null +++ test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s @@ -0,0 +1,178 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmm28 +// CHECK: encoding: [0x62,0x02,0x17,0x00,0x72,0xf4] + vcvtne2ps2bf16 xmm30, xmm29, xmm28 + +// CHECK: vcvtne2ps2bf16 xmm30 {k7}, xmm29, xmm28 +// CHECK: encoding: [0x62,0x02,0x17,0x07,0x72,0xf4] + vcvtne2ps2bf16 xmm30 {k7}, xmm29, xmm28 + +// CHECK: vcvtne2ps2bf16 xmm30 {k7} {z}, xmm29, xmm28 +// CHECK: encoding: [0x62,0x02,0x17,0x87,0x72,0xf4] + vcvtne2ps2bf16 xmm30 {k7} {z}, xmm29, xmm28 + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rcx] +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x31] + vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rcx] + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 291] +// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 291] + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 268435456] + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rsp - 4] +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff] + vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rsp - 4] + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rcx]{1to4} +// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x31] + vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rcx]{1to4} + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx + 2032] +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x7f] + vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx + 2032] + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x80] + vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx - 2048] + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx + 508]{1to4} +// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x7f] + vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx + 508]{1to4} + +// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx - 512]{1to4} +// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x80] + vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx - 512]{1to4} + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymm28 +// CHECK: encoding: [0x62,0x02,0x17,0x20,0x72,0xf4] + vcvtne2ps2bf16 ymm30, ymm29, ymm28 + +// CHECK: vcvtne2ps2bf16 ymm30 {k7}, ymm29, ymm28 +// CHECK: encoding: [0x62,0x02,0x17,0x27,0x72,0xf4] + vcvtne2ps2bf16 ymm30 {k7}, ymm29, ymm28 + +// CHECK: vcvtne2ps2bf16 ymm30 {k7} {z}, ymm29, ymm28 +// CHECK: encoding: [0x62,0x02,0x17,0xa7,0x72,0xf4] + vcvtne2ps2bf16 ymm30 {k7} {z}, ymm29, ymm28 + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rcx] +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x31] + vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rcx] + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 291] +// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 291] + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 268435456] + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rsp - 4] +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff] + vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rsp - 4] + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rcx]{1to8} +// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x31] + vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rcx]{1to8} + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx + 4064] +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x7f] + vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx + 4064] + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x80] + vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx - 4096] + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx + 508]{1to8} +// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x7f] + vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx + 508]{1to8} + +// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx - 512]{1to8} +// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x80] + vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx - 512]{1to8} + +// CHECK: vcvtneps2bf16 xmm30, xmm29 +// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x72,0xf5] + vcvtneps2bf16 xmm30, xmm29 + +// CHECK: vcvtneps2bf16 xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcvtneps2bf16 xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneps2bf16 xmm30, dword ptr [r9]{1to4} +// CHECK: encoding: [0x62,0x42,0x7e,0x18,0x72,0x31] + vcvtneps2bf16 xmm30, dword ptr [r9]{1to4} + +// CHECK: vcvtneps2bf16 xmm30, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x72,0x71,0x7f] + vcvtneps2bf16 xmm30, xmmword ptr [rcx + 2032] + +// CHECK: vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4} +// CHECK: encoding: [0x62,0x62,0x7e,0x9f,0x72,0x72,0x80] + vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4} + +// CHECK: vcvtneps2bf16 xmm30, ymm29 +// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x72,0xf5] + vcvtneps2bf16 xmm30, ymm29 + +// CHECK: vcvtneps2bf16 xmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x7e,0x2f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcvtneps2bf16 xmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneps2bf16 xmm30, dword ptr [r9]{1to8} +// CHECK: encoding: [0x62,0x42,0x7e,0x38,0x72,0x31] + vcvtneps2bf16 xmm30, dword ptr [r9]{1to8} + +// CHECK: vcvtneps2bf16 xmm30, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x72,0x71,0x7f] + vcvtneps2bf16 xmm30, ymmword ptr [rcx + 4064] + +// CHECK: vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8} +// CHECK: encoding: [0x62,0x62,0x7e,0xbf,0x72,0x72,0x80] + vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8} + +// CHECK: vdpbf16ps ymm30, ymm29, ymm28 +// CHECK: encoding: [0x62,0x02,0x16,0x20,0x52,0xf4] + vdpbf16ps ymm30, ymm29, ymm28 + +// CHECK: vdpbf16ps ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdpbf16ps ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vdpbf16ps ymm30, ymm29, dword ptr [r9]{1to8} +// CHECK: encoding: [0x62,0x42,0x16,0x30,0x52,0x31] + vdpbf16ps ymm30, ymm29, dword ptr [r9]{1to8} + +// CHECK: vdpbf16ps ymm30, ymm29, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0x62,0x16,0x20,0x52,0x71,0x7f] + vdpbf16ps ymm30, ymm29, ymmword ptr [rcx + 4064] + +// CHECK: vdpbf16ps ymm30 {k7} {z}, ymm29, dword ptr [rdx - 512]{1to8} +// CHECK: encoding: [0x62,0x62,0x16,0xb7,0x52,0x72,0x80] + vdpbf16ps ymm30 {k7} {z}, ymm29, dword ptr [rdx - 512]{1to8} + +// CHECK: vdpbf16ps xmm30, xmm29, xmm28 +// CHECK: encoding: [0x62,0x02,0x16,0x00,0x52,0xf4] + vdpbf16ps xmm30, xmm29, xmm28 + +// CHECK: vdpbf16ps xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdpbf16ps xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vdpbf16ps xmm30, xmm29, dword ptr [r9]{1to4} +// CHECK: encoding: [0x62,0x42,0x16,0x10,0x52,0x31] + vdpbf16ps xmm30, xmm29, dword ptr [r9]{1to4} + +// CHECK: vdpbf16ps xmm30, xmm29, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0x62,0x16,0x00,0x52,0x71,0x7f] + vdpbf16ps xmm30, xmm29, xmmword ptr [rcx + 2032] + +// CHECK: vdpbf16ps xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]{1to4} +// CHECK: encoding: [0x62,0x62,0x16,0x97,0x52,0x72,0x80] + vdpbf16ps xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]{1to4} + Index: test/MC/X86/x86-64-avx512_bf16-encoding.s =================================================================== --- /dev/null +++ test/MC/X86/x86-64-avx512_bf16-encoding.s @@ -0,0 +1,90 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x02,0x17,0x40,0x72,0xf4] + vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} +// CHECK: encoding: [0x62,0x02,0x17,0x47,0x72,0xf4] + vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} + +// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x17,0xc7,0x72,0xf4] + vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} {z} + +// CHECK: vcvtne2ps2bf16 (%rcx), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x31] + vcvtne2ps2bf16 (%rcx), %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 291(%rax,%r14,8), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 291(%rax,%r14,8), %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 268435456(%rax,%r14,8), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 268435456(%rax,%r14,8), %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 -64(%rsp), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x74,0x24,0xff] + vcvtne2ps2bf16 -64(%rsp), %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 (%rcx){1to16}, %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x31] + vcvtne2ps2bf16 (%rcx){1to16}, %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 8128(%rdx), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x7f] + vcvtne2ps2bf16 8128(%rdx), %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 -8192(%rdx), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x80] + vcvtne2ps2bf16 -8192(%rdx), %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 508(%rdx){1to16}, %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x7f] + vcvtne2ps2bf16 508(%rdx){1to16}, %zmm29, %zmm30 + +// CHECK: vcvtne2ps2bf16 -512(%rdx){1to16}, %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x80] + vcvtne2ps2bf16 -512(%rdx){1to16}, %zmm29, %zmm30 + +// CHECK: vcvtneps2bf16 %zmm29, %ymm30 +// CHECK: encoding: [0x62,0x02,0x7e,0x48,0x72,0xf5] + vcvtneps2bf16 %zmm29, %ymm30 + +// CHECK: vcvtneps2bf16 268435456(%rbp,%r14,8), %ymm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x7e,0x4f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcvtneps2bf16 268435456(%rbp,%r14,8), %ymm30 {%k7} + +// CHECK: vcvtneps2bf16 (%r9){1to16}, %ymm30 +// CHECK: encoding: [0x62,0x42,0x7e,0x58,0x72,0x31] + vcvtneps2bf16 (%r9){1to16}, %ymm30 + +// CHECK: vcvtneps2bf16 8128(%rcx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x72,0x71,0x7f] + vcvtneps2bf16 8128(%rcx), %ymm30 + +// CHECK: vcvtneps2bf16 -512(%rdx){1to16}, %ymm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0x7e,0xdf,0x72,0x72,0x80] + vcvtneps2bf16 -512(%rdx){1to16}, %ymm30 {%k7} {z} + +// CHECK: vdpbf16ps %zmm28, %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x02,0x16,0x40,0x52,0xf4] + vdpbf16ps %zmm28, %zmm29, %zmm30 + +// CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x16,0x47,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdpbf16ps 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7} + +// CHECK: vdpbf16ps (%r9){1to16}, %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x42,0x16,0x50,0x52,0x31] + vdpbf16ps (%r9){1to16}, %zmm29, %zmm30 + +// CHECK: vdpbf16ps 8128(%rcx), %zmm29, %zmm30 +// CHECK: encoding: [0x62,0x62,0x16,0x40,0x52,0x71,0x7f] + vdpbf16ps 8128(%rcx), %zmm29, %zmm30 + +// CHECK: vdpbf16ps -512(%rdx){1to16}, %zmm29, %zmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0x16,0xd7,0x52,0x72,0x80] + vdpbf16ps -512(%rdx){1to16}, %zmm29, %zmm30 {%k7} {z} + Index: test/MC/X86/x86-64-avx512_bf16_vl-encoding.s =================================================================== --- /dev/null +++ test/MC/X86/x86-64-avx512_bf16_vl-encoding.s @@ -0,0 +1,178 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s + +// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x02,0x17,0x00,0x72,0xf4] + vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x02,0x17,0x07,0x72,0xf4] + vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} + +// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x17,0x87,0x72,0xf4] + vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} {z} + +// CHECK: vcvtne2ps2bf16 (%rcx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x31] + vcvtne2ps2bf16 (%rcx), %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 291(%rax,%r14,8), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 291(%rax,%r14,8), %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 268435456(%rax,%r14,8), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 268435456(%rax,%r14,8), %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 -16(%rsp), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x74,0x24,0xff] + vcvtne2ps2bf16 -16(%rsp), %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 (%rcx){1to4}, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x31] + vcvtne2ps2bf16 (%rcx){1to4}, %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 2032(%rdx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x7f] + vcvtne2ps2bf16 2032(%rdx), %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 -2048(%rdx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x80] + vcvtne2ps2bf16 -2048(%rdx), %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 508(%rdx){1to4}, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x7f] + vcvtne2ps2bf16 508(%rdx){1to4}, %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 -512(%rdx){1to4}, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x80] + vcvtne2ps2bf16 -512(%rdx){1to4}, %xmm29, %xmm30 + +// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x02,0x17,0x20,0x72,0xf4] + vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} +// CHECK: encoding: [0x62,0x02,0x17,0x27,0x72,0xf4] + vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} + +// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x17,0xa7,0x72,0xf4] + vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} {z} + +// CHECK: vcvtne2ps2bf16 (%rcx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x31] + vcvtne2ps2bf16 (%rcx), %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 291(%rax,%r14,8), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtne2ps2bf16 291(%rax,%r14,8), %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 268435456(%rax,%r14,8), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10] + vcvtne2ps2bf16 268435456(%rax,%r14,8), %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 -32(%rsp), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x74,0x24,0xff] + vcvtne2ps2bf16 -32(%rsp), %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 (%rcx){1to8}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x31] + vcvtne2ps2bf16 (%rcx){1to8}, %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 4064(%rdx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x7f] + vcvtne2ps2bf16 4064(%rdx), %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 -4096(%rdx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x80] + vcvtne2ps2bf16 -4096(%rdx), %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 508(%rdx){1to8}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x7f] + vcvtne2ps2bf16 508(%rdx){1to8}, %ymm29, %ymm30 + +// CHECK: vcvtne2ps2bf16 -512(%rdx){1to8}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x80] + vcvtne2ps2bf16 -512(%rdx){1to8}, %ymm29, %ymm30 + +// CHECK: vcvtneps2bf16 %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x72,0xf5] + vcvtneps2bf16 %xmm29, %xmm30 + +// CHECK: vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm30 {%k7} + +// CHECK: vcvtneps2bf16 (%r9){1to4}, %xmm30 +// CHECK: encoding: [0x62,0x42,0x7e,0x18,0x72,0x31] + vcvtneps2bf16 (%r9){1to4}, %xmm30 + +// CHECK: vcvtneps2bf16x 2032(%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x72,0x71,0x7f] + vcvtneps2bf16x 2032(%rcx), %xmm30 + +// CHECK: vcvtneps2bf16 -512(%rdx){1to4}, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0x7e,0x9f,0x72,0x72,0x80] + vcvtneps2bf16 -512(%rdx){1to4}, %xmm30 {%k7} {z} + +// CHECK: vcvtneps2bf16 %ymm29, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x72,0xf5] + vcvtneps2bf16 %ymm29, %xmm30 + +// CHECK: vcvtneps2bf16y 268435456(%rbp,%r14,8), %xmm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x7e,0x2f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcvtneps2bf16y 268435456(%rbp,%r14,8), %xmm30 {%k7} + +// CHECK: vcvtneps2bf16 (%r9){1to8}, %xmm30 +// CHECK: encoding: [0x62,0x42,0x7e,0x38,0x72,0x31] + vcvtneps2bf16 (%r9){1to8}, %xmm30 + +// CHECK: vcvtneps2bf16y 4064(%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x72,0x71,0x7f] + vcvtneps2bf16y 4064(%rcx), %xmm30 + +// CHECK: vcvtneps2bf16 -512(%rdx){1to8}, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0x7e,0xbf,0x72,0x72,0x80] + vcvtneps2bf16 -512(%rdx){1to8}, %xmm30 {%k7} {z} + +// CHECK: vdpbf16ps %ymm28, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x02,0x16,0x20,0x52,0xf4] + vdpbf16ps %ymm28, %ymm29, %ymm30 + +// CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %ymm29, %ymm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdpbf16ps 268435456(%rbp,%r14,8), %ymm29, %ymm30 {%k7} + +// CHECK: vdpbf16ps (%r9){1to8}, %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x42,0x16,0x30,0x52,0x31] + vdpbf16ps (%r9){1to8}, %ymm29, %ymm30 + +// CHECK: vdpbf16ps 4064(%rcx), %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x62,0x16,0x20,0x52,0x71,0x7f] + vdpbf16ps 4064(%rcx), %ymm29, %ymm30 + +// CHECK: vdpbf16ps -512(%rdx){1to8}, %ymm29, %ymm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0x16,0xb7,0x52,0x72,0x80] + vdpbf16ps -512(%rdx){1to8}, %ymm29, %ymm30 {%k7} {z} + +// CHECK: vdpbf16ps %xmm28, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x02,0x16,0x00,0x52,0xf4] + vdpbf16ps %xmm28, %xmm29, %xmm30 + +// CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdpbf16ps 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7} + +// CHECK: vdpbf16ps (%r9){1to4}, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x42,0x16,0x10,0x52,0x31] + vdpbf16ps (%r9){1to4}, %xmm29, %xmm30 + +// CHECK: vdpbf16ps 2032(%rcx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x62,0x16,0x00,0x52,0x71,0x7f] + vdpbf16ps 2032(%rcx), %xmm29, %xmm30 + +// CHECK: vdpbf16ps -512(%rdx){1to4}, %xmm29, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0x16,0x97,0x52,0x72,0x80] + vdpbf16ps -512(%rdx){1to4}, %xmm29, %xmm30 {%k7} {z} +