diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1321,6 +1321,8 @@ } } else if (VT == MVT::f16) { Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; + } else if (VT == MVT::bf16) { + Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::f32) { Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; } else if (VT == MVT::f64 || VT.is64BitVector()) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -623,6 +623,7 @@ setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::f16, Custom); + setOperationAction(ISD::BITCAST, MVT::bf16, Custom); // Indexed loads and stores are supported. for (unsigned im = (unsigned)ISD::PRE_INC; @@ -634,6 +635,7 @@ setIndexedLoadAction(im, MVT::f64, Legal); setIndexedLoadAction(im, MVT::f32, Legal); setIndexedLoadAction(im, MVT::f16, Legal); + setIndexedLoadAction(im, MVT::bf16, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); @@ -641,6 +643,7 @@ setIndexedStoreAction(im, MVT::f64, Legal); setIndexedStoreAction(im, MVT::f32, Legal); setIndexedStoreAction(im, MVT::f16, Legal); + setIndexedStoreAction(im, MVT::bf16, Legal); } // Trap. @@ -2812,7 +2815,8 @@ } static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) { - if (Op.getValueType() != MVT::f16) + EVT OpVT = Op.getValueType(); + if (OpVT != MVT::f16 && OpVT != MVT::bf16) return SDValue(); assert(Op.getOperand(0).getValueType() == MVT::i16); @@ -2821,7 +2825,7 @@ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); return SDValue( - DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op, + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op, DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), 0); } @@ -3561,9 +3565,7 @@ RC = &AArch64::GPR32RegClass; else if (RegVT == MVT::i64) RC = &AArch64::GPR64RegClass; - else if (RegVT == MVT::f16) - RC = &AArch64::FPR16RegClass; - else if (RegVT == MVT::bf16) + else if (RegVT == MVT::f16 || RegVT == MVT::bf16) RC = &AArch64::FPR16RegClass; else if (RegVT == MVT::f32) RC = &AArch64::FPR32RegClass; @@ -5258,8 +5260,8 @@ Cmp); } - assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || - LHS.getValueType() == MVT::f64); + assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || + LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two branches to implement. @@ -7276,7 +7278,8 @@ return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS); // vrev <4 x i16> -> REV32 if (VT.getVectorElementType() == MVT::i16 || - VT.getVectorElementType() == MVT::f16) + VT.getVectorElementType() == MVT::f16 || + VT.getVectorElementType() == MVT::bf16) return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS); // vrev <4 x i8> -> REV16 assert(VT.getVectorElementType() == MVT::i8); @@ -7289,7 +7292,7 @@ unsigned Opcode; if (EltTy == MVT::i8) Opcode = AArch64ISD::DUPLANE8; - else if (EltTy == MVT::i16 || EltTy == MVT::f16) + else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16) Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) Opcode = AArch64ISD::DUPLANE32; @@ -7396,7 +7399,7 @@ static unsigned getDUPLANEOp(EVT EltType) { if (EltType == MVT::i8) return AArch64ISD::DUPLANE8; - if (EltType == MVT::i16 || EltType == MVT::f16) + if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16) return AArch64ISD::DUPLANE16; if (EltType == MVT::i32 || EltType == MVT::f32) return AArch64ISD::DUPLANE32; @@ -7632,6 +7635,7 @@ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); break; case MVT::f16: + case MVT::bf16: case MVT::f32: case MVT::f64: // Fine as is @@ -8315,8 +8319,8 @@ if (VT.getVectorElementType().isFloatingPoint()) { SmallVector Ops; EVT EltTy = VT.getVectorElementType(); - assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && - "Unsupported floating-point vector type"); + assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || + EltTy == MVT::f64) && "Unsupported floating-point vector type"); LLVM_DEBUG( dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int " "BITCASTS, and try again\n"); @@ -8435,11 +8439,12 @@ // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || - VT == MVT::v8f16) + VT == MVT::v8f16 || VT == MVT::v8bf16) return Op; if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && - VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16) + VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 && + VT != MVT::v4bf16) return SDValue(); // For V64 types, we perform insertion by expanding the value @@ -8469,11 +8474,12 @@ // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || - VT == MVT::v8f16) + VT == MVT::v8f16 || VT == MVT::v8bf16) return Op; if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && - VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16) + VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 && + VT != MVT::v4bf16) return SDValue(); // For V64 types, we perform extraction by expanding the value @@ -13591,7 +13597,8 @@ SDLoc DL(N); SDValue Op = N->getOperand(0); - if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16) + if (N->getValueType(0) != MVT::i16 || + (Op.getValueType() != MVT::f16 && Op.getValueType() != MVT::bf16)) return; Op = SDValue( diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2323,6 +2323,10 @@ [(set (f128 FPR128Op:$Rt), (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; +// bf16 load pattern +def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; + // For regular load, we do not have any alignment requirement. // Thus, it is safe to directly map the vector loads with interesting // addressing modes. @@ -2968,6 +2972,11 @@ (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; +// bf16 store pattern +def : Pat<(store (bf16 FPR16Op:$Rt), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; + let AddedComplexity = 10 in { // Match all store 64 bits width whose type is compatible with FPR64 @@ -4770,6 +4779,7 @@ defm : ExtPat; defm : ExtPat; defm : ExtPat; +defm : ExtPat; defm : ExtPat; defm : ExtPat; defm : ExtPat; @@ -4891,16 +4901,29 @@ (v4f16 (DUPv4i16lane (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), (i64 0)))>; +def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), + (v4bf16 (DUPv4i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), + (i64 0)))>; def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), (v8f16 (DUPv8i16lane (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), (i64 0)))>; +def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), + (v8bf16 (DUPv8i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), + (i64 0)))>; def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; +def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), + (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; +def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), + (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; + def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), @@ -5016,6 +5039,11 @@ def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), + (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), + (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 FPR32:$Rn), ssub))>; @@ -5032,6 +5060,11 @@ def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), + (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), + (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), @@ -5057,6 +5090,23 @@ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), (i64 0))>; +def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), + (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), + (EXTRACT_SUBREG + (INSvi16lane + (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexS:$imm, + (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), + (i64 0)), + dsub)>; + +def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), + (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), + (INSvi16lane + V128:$Rn, VectorIndexH:$imm, + (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), + (i64 0))>; + def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), (EXTRACT_SUBREG @@ -5138,6 +5188,7 @@ } defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; @@ -5151,6 +5202,9 @@ (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; def : Pat<(vector_extract (v8f16 V128:$Rn), 0), (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; +def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), + (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; + def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; @@ -5158,6 +5212,8 @@ (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), + (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be @@ -5173,6 +5229,7 @@ def : ConcatPat; def : ConcatPat; def : ConcatPat; +def : ConcatPat; def : ConcatPat; // If the high lanes are undef, though, we can just ignore them: @@ -6614,6 +6671,7 @@ def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -6621,12 +6679,14 @@ def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -6634,6 +6694,7 @@ def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -6650,6 +6711,7 @@ def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; @@ -6658,6 +6720,7 @@ def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; @@ -6666,6 +6729,7 @@ def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; @@ -6674,6 +6738,7 @@ def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; @@ -6685,6 +6750,7 @@ def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; @@ -6693,6 +6759,7 @@ def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; let Predicates = [IsLE] in { @@ -6700,6 +6767,7 @@ def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), @@ -6710,6 +6778,8 @@ (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), @@ -6724,6 +6794,8 @@ (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), + (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; @@ -6735,6 +6807,8 @@ (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), + (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; } @@ -6764,6 +6838,7 @@ def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; } let Predicates = [IsBE] in { @@ -6775,6 +6850,8 @@ (v1i64 (REV64v8i8 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), + (v1i64 (REV64v4i16 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 (REV64v2i32 FPR64:$src))>; } @@ -6788,6 +6865,7 @@ def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), @@ -6802,6 +6880,8 @@ (v2i32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), + (v2i32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; @@ -6828,6 +6908,7 @@ (v4i16 (REV64v4i16 FPR64:$src))>; } def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; @@ -6836,6 +6917,13 @@ def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; + +def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; +def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; +def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; +def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; +def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; +def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), @@ -6850,8 +6938,22 @@ (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; + +def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), + (v4bf16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), + (v4bf16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), + (v4bf16 (REV16v8i8 FPR64:$src))>; +def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), + (v4bf16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), + (v4bf16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), + (v4bf16 (REV64v4i16 FPR64:$src))>; } def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; @@ -6861,6 +6963,7 @@ def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), @@ -6877,6 +6980,8 @@ (v8i8 (REV64v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 (REV16v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), + (v8i8 (REV16v8i8 FPR64:$src))>; } let Predicates = [IsLE] in { @@ -6885,6 +6990,7 @@ def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), @@ -6897,6 +7003,8 @@ (f64 (REV64v8i8 FPR64:$src))>; def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), + (f64 (REV64v4i16 FPR64:$src))>; } def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; @@ -6907,6 +7015,7 @@ def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), @@ -6919,6 +7028,8 @@ (v1f64 (REV64v2i32 FPR64:$src))>; def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), + (v1f64 (REV64v4i16 FPR64:$src))>; } def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; @@ -6930,6 +7041,7 @@ def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), @@ -6944,6 +7056,8 @@ (v2f32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), + (v2f32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; @@ -6954,6 +7068,7 @@ def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; } let Predicates = [IsBE] in { @@ -6968,6 +7083,9 @@ def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), @@ -6983,6 +7101,7 @@ def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; } @@ -6996,6 +7115,8 @@ (v2f64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 (REV64v16i8 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), @@ -7007,6 +7128,7 @@ def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; @@ -7019,6 +7141,8 @@ (v4f32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 (REV32v16i8 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), @@ -7035,6 +7159,7 @@ def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), @@ -7050,6 +7175,8 @@ (v2i64 (REV64v4i32 FPR128:$src))>; def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), + (v2i64 (REV64v8i16 FPR128:$src))>; } def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; @@ -7060,6 +7187,7 @@ def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), @@ -7076,6 +7204,8 @@ (v4i32 (REV64v4i32 FPR128:$src))>; def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), + (v4i32 (REV32v8i16 FPR128:$src))>; } def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; @@ -7104,6 +7234,7 @@ (v8i16 (REV32v8i16 FPR128:$src))>; } def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; @@ -7112,6 +7243,13 @@ def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; + +def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; +def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; +def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; +def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; +def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; +def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), @@ -7128,8 +7266,24 @@ (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; + +def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), + (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), + (i32 8)))>; +def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), + (v8bf16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), + (v8bf16 (REV32v8i16 FPR128:$src))>; +def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), + (v8bf16 (REV16v16i8 FPR128:$src))>; +def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), + (v8bf16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), + (v8bf16 (REV32v8i16 FPR128:$src))>; } def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; @@ -7139,6 +7293,7 @@ def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), @@ -7157,6 +7312,8 @@ (v16i8 (REV32v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 (REV16v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), + (v16i8 (REV16v16i8 FPR128:$src))>; } def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), @@ -7167,6 +7324,8 @@ (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), @@ -7198,6 +7357,8 @@ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; }