diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -260,6 +260,7 @@ def llvm_v32i1_ty : LLVMType; // 32 x i1 def llvm_v64i1_ty : LLVMType; // 64 x i1 def llvm_v128i1_ty : LLVMType; // 128 x i1 +def llvm_v256i1_ty : LLVMType; // 256 x i1 def llvm_v512i1_ty : LLVMType; // 512 x i1 def llvm_v1024i1_ty : LLVMType; //1024 x i1 diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1364,3 +1364,32 @@ Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; } + +let TargetPrefix = "ppc" in { + def int_ppc_mma_assemble_acc : + Intrinsic<[llvm_v512i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + + def int_ppc_mma_disassemble_acc : + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_assemble_pair : + Intrinsic<[llvm_v256i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + + def int_ppc_mma_disassemble_pair : + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty], + [llvm_v256i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxmtacc : + Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxmfacc : + Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxsetaccz : + Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>; +} + diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -834,7 +834,8 @@ IIT_VEC_OF_BITCASTS_TO_INT = 46, IIT_V128 = 47, IIT_BF16 = 48, - IIT_STRUCT9 = 49 + IIT_STRUCT9 = 49, + IIT_V256 = 50 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -928,6 +929,10 @@ OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector)); DecodeIITType(NextElt, Infos, Info, OutputTable); return; + case IIT_V256: + OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector)); + DecodeIITType(NextElt, Infos, Info, OutputTable); + return; case IIT_V512: OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector)); DecodeIITType(NextElt, Infos, Info, OutputTable); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10416,11 +10416,32 @@ SDLoc dl(Op); - if (IntrinsicID == Intrinsic::thread_pointer) { + switch (IntrinsicID) { + case Intrinsic::thread_pointer: // Reads the thread pointer register, used for __builtin_thread_pointer. if (Subtarget.isPPC64()) return DAG.getRegister(PPC::X13, MVT::i64); return DAG.getRegister(PPC::R2, MVT::i32); + + case Intrinsic::ppc_mma_disassemble_acc: + case Intrinsic::ppc_mma_disassemble_pair: { + int NumVecs = 2; + SDValue WideVec = Op.getOperand(1); + if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) { + NumVecs = 4; + WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec); + } + SmallVector RetOps; + for (int VecNo = 0; VecNo < NumVecs; VecNo++) { + SDValue Extract = DAG.getNode( + PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec, + DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo + : VecNo, + dl, MVT::i64)); + RetOps.push_back(Extract); + } + return DAG.getMergeValues(RetOps, dl); + } } // If this is a lowered altivec predicate compare, CompareOpc is set to the diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -672,6 +672,7 @@ case PPC::V_SETALLONES: case PPC::CRSET: case PPC::CRUNSET: + case PPC::XXSETACCZ: return true; } return false; @@ -1340,6 +1341,22 @@ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) || PPC::VSSRCRegClass.contains(DestReg, SrcReg)) Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf; + else if (Subtarget.pairedVectorMemops() && + PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) { + if (SrcReg > PPC::VSRp15) + SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2; + else + SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2; + if (DestReg > PPC::VSRp15) + DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2; + else + DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2; + BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg). + addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1). + addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc)); + return; + } else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1264,12 +1264,14 @@ let Predicates = [MMA] in { def XXMFACC : XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", - IIC_VecGeneral, []>, RegConstraint<"$ASo = $AS">, - NoEncode<"$ASo">; + IIC_VecGeneral, + [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, + RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; def XXMTACC : XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", - IIC_VecGeneral, []>, RegConstraint<"$ATi = $AT">, - NoEncode<"$ATi">; + IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), "#KILL_PAIR", []>, RegConstraint<"$XTp = $XSp">; @@ -1280,8 +1282,8 @@ // register and this copy is more expensive than calling the intrinsic again. let isAsCheapAsAMove = 1, isReMaterializable = 1 in { def XXSETACCZ : - XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", - IIC_VecGeneral, []>; + XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; } def XVI8GER4SPP : XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), @@ -1369,6 +1371,11 @@ (XXMTACC Concats.VecsToVecQuad)>; def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)), Concats.VecsToVecPair0>; + def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, + v16i8:$vs3, v16i8:$vs2)), + (XXMTACC Concats.VecsToVecQuad)>; + def : Pat<(v256i1 (int_ppc_mma_assemble_pair v16i8:$vs1, v16i8:$vs0)), + Concats.VecsToVecPair0>; def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))), Extracts.Vec0>; diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -0,0 +1,250 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +; assemble_acc +declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) { +; CHECK-LABEL: ass_acc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: xxlor vs0, v2, v2 +; CHECK-NEXT: xxlor vs1, v3, v3 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs3, v3, v3 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: ass_acc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: xxlor vs0, v2, v2 +; CHECK-BE-NEXT: xxlor vs1, v3, v3 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs3, v3, v3 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: ass_acc: +; CHECK-O0: # %bb.0: # %entry +; CHECK-BE-O0-LABEL: ass_acc: +; CHECK-BE-O0: # %bb.0: # %entry +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %0, <512 x i1>* %ptr, align 64 + ret void +} + +; assemble_pair +declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>) +define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) { +; CHECK-LABEL: ass_pair: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: ass_pair: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: stxv v2, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc) + store <256 x i1> %0, <256 x i1>* %ptr, align 32 + ret void +} + +; xxmtacc +declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) +define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) { +; CHECK-LABEL: int_xxmtacc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: xxlor vs0, v2, v2 +; CHECK-NEXT: xxlor vs1, v3, v3 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs3, v3, v3 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: int_xxmtacc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: xxlor vs0, v2, v2 +; CHECK-BE-NEXT: xxlor vs1, v3, v3 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs3, v3, v3 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: blr +entry: +; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is +; generated from the call to xxmtacc then one xxmfacc is generated for the store + %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) + %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0) + store <512 x i1> %1, <512 x i1>* %ptr, align 64 + ret void +} + +; xxmfacc +declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>) +define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) { +; CHECK-LABEL: int_xxmfacc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: xxlor vs0, v2, v2 +; CHECK-NEXT: xxlor vs1, v3, v3 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs3, v3, v3 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: int_xxmfacc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: xxlor vs0, v2, v2 +; CHECK-BE-NEXT: xxlor vs1, v3, v3 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs3, v3, v3 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: blr +entry: +; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is +; generated from the call to xxmfacc then one xxmfacc is generated for the store + %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) + %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0) + store <512 x i1> %1, <512 x i1>* %ptr, align 64 + ret void +} + +; xxsetaccz +declare <512 x i1> @llvm.ppc.mma.xxsetaccz() +define void @int_xxsetaccz(<512 x i1>* %ptr) { +; CHECK-LABEL: int_xxsetaccz: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: int_xxsetaccz: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() + store <512 x i1> %0, <512 x i1>* %ptr, align 64 + ret void +} + +; disassemble_acc +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) +define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) { +; CHECK-LABEL: disass_acc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: stxv vs2, 0(r4) +; CHECK-NEXT: stxv vs1, 0(r5) +; CHECK-NEXT: stxv vs0, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: disass_acc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 0(r4) +; CHECK-BE-NEXT: stxv vs2, 0(r5) +; CHECK-BE-NEXT: stxv vs3, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() + %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) + %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 + %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 + %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 + %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3 + store <16 x i8> %2, <16 x i8>* %ptr1, align 16 + store <16 x i8> %3, <16 x i8>* %ptr2, align 16 + store <16 x i8> %4, <16 x i8>* %ptr3, align 16 + store <16 x i8> %5, <16 x i8>* %ptr4, align 16 + ret void +} + +; disassemble_pair +declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>) +define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) { +; CHECK-LABEL: disass_pair: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv vs1, 0(r3) +; CHECK-NEXT: lxv vs0, 16(r3) +; CHECK-NEXT: stxv vs1, 0(r4) +; CHECK-NEXT: stxv vs0, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: disass_pair: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r4) +; CHECK-BE-NEXT: stxv vs1, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32 + %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0) + %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 + %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1 + store <16 x i8> %2, <16 x i8>* %ptr2, align 16 + store <16 x i8> %3, <16 x i8>* %ptr3, align 16 + ret void +} + diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -247,7 +247,8 @@ IIT_VEC_OF_BITCASTS_TO_INT = 46, IIT_V128 = 47, IIT_BF16 = 48, - IIT_STRUCT9 = 49 + IIT_STRUCT9 = 49, + IIT_V256 = 50 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -385,6 +386,7 @@ case 32: Sig.push_back(IIT_V32); break; case 64: Sig.push_back(IIT_V64); break; case 128: Sig.push_back(IIT_V128); break; + case 256: Sig.push_back(IIT_V256); break; case 512: Sig.push_back(IIT_V512); break; case 1024: Sig.push_back(IIT_V1024); break; }