diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -251,6 +251,7 @@ def llvm_v32i1_ty : LLVMType; // 32 x i1 def llvm_v64i1_ty : LLVMType; // 64 x i1 def llvm_v128i1_ty : LLVMType; // 128 x i1 +def llvm_v256i1_ty : LLVMType; // 256 x i1 def llvm_v512i1_ty : LLVMType; // 512 x i1 def llvm_v1024i1_ty : LLVMType; //1024 x i1 diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -194,6 +194,28 @@ [IntrNoMem]>; //===----------------------------------------------------------------------===// +// PowerPC MMA Intrinsic Multi Class Definitions. +// + +multiclass PowerPC_MMA_ACC_Intrinsic args> { + def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>; + def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; + def pn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; + def np : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; + def nn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; +} + +multiclass PowerPC_MMA_ACC_PP_Intrinsic args> { + def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>; + def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// // PowerPC Altivec Intrinsic Definitions. let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". @@ -1183,3 +1205,93 @@ Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; } + +let TargetPrefix = "ppc" in { + def int_ppc_mma_assemble_acc : + Intrinsic<[llvm_v512i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + + def int_ppc_mma_disassemble_acc : + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_assemble_pair : + Intrinsic<[llvm_v256i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + + def int_ppc_mma_disassemble_pair : + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty], + [llvm_v256i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxmtacc : + Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxmfacc : + Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxsetaccz : + Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>; + + // MMA Reduced-Precision: Outer Product Intrinsic Definitions. + defm int_ppc_mma_xvi4ger8 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi4ger8 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + + defm int_ppc_mma_xvi8ger4 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi8ger4 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + + defm int_ppc_mma_xvi16ger2s : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi16ger2s : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + + defm int_ppc_mma_xvf16ger2 : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvf16ger2 : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + defm int_ppc_mma_xvf32ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvf32ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty]>; + defm int_ppc_mma_xvf64ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvf64ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty]>; + + // MMA Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions. + defm int_ppc_mma_xvbf16ger2 : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvbf16ger2 : + PowerPC_MMA_ACC_Intrinsic< + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; + + // Conversion intrinsics for bfloat16 format. + def int_ppc_mma_xvcvspbf16 : GCCBuiltin<"__builtin_xvcvspbf16">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_mma_xvcvbf16sp : GCCBuiltin<"__builtin_xvcvbf16sp">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + + // MMA Reduced-Precision: Missing Integer-based Outer Product Operations. + defm int_ppc_mma_xvi16ger2 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi16ger2 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + def int_ppc_mma_xvi8ger4spp : + Intrinsic<[llvm_v512i1_ty], + [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_mma_pmxvi8ger4spp : + Intrinsic<[llvm_v512i1_ty], + [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +} diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -748,7 +748,8 @@ IIT_SUBDIVIDE4_ARG = 45, IIT_VEC_OF_BITCASTS_TO_INT = 46, IIT_V128 = 47, - IIT_BF16 = 48 + IIT_BF16 = 48, + IIT_V256 = 49 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -842,6 +843,10 @@ OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector)); DecodeIITType(NextElt, Infos, Info, OutputTable); return; + case IIT_V256: + OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector)); + DecodeIITType(NextElt, Infos, Info, OutputTable); + return; case IIT_V512: OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector)); DecodeIITType(NextElt, Infos, Info, OutputTable); diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -246,7 +246,8 @@ IIT_SUBDIVIDE4_ARG = 45, IIT_VEC_OF_BITCASTS_TO_INT = 46, IIT_V128 = 47, - IIT_BF16 = 48 + IIT_BF16 = 48, + IIT_V256 = 49 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -384,6 +385,7 @@ case 32: Sig.push_back(IIT_V32); break; case 64: Sig.push_back(IIT_V64); break; case 128: Sig.push_back(IIT_V128); break; + case 256: Sig.push_back(IIT_V256); break; case 512: Sig.push_back(IIT_V512); break; case 1024: Sig.push_back(IIT_V1024); break; }