diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -113,20 +113,20 @@ cl::init(false)); static cl::opt -EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, - cl::desc("Enable AArch64 logical imm instruction " - "optimization"), - cl::init(true)); + EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, + cl::desc("Enable AArch64 logical imm instruction " + "optimization"), + cl::init(true)); // Temporary option added for the purpose of testing functionality added // to DAGCombiner.cpp in D92230. It is expected that this can be removed // in future when both implementations will be based off MGATHER rather // than the GLD1 nodes added for the SVE gather load intrinsics. static cl::opt -EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, - cl::desc("Combine extends of AArch64 masked " - "gather intrinsics"), - cl::init(true)); + EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, + cl::desc("Combine extends of AArch64 masked " + "gather intrinsics"), + cl::init(true)); /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -604,21 +604,45 @@ } if (!Subtarget->hasFullFP16()) { - for (auto Op : - {ISD::SETCC, ISD::SELECT_CC, - ISD::BR_CC, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FDIV, ISD::FMA, - ISD::FNEG, ISD::FABS, ISD::FCEIL, - ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, - ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, - ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, - ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, - ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, - ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT, - ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, - ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, - ISD::STRICT_FMAXIMUM}) + for (auto Op : {ISD::SETCC, + ISD::SELECT_CC, + ISD::BR_CC, + ISD::FADD, + ISD::FSUB, + ISD::FMUL, + ISD::FDIV, + ISD::FMA, + ISD::FNEG, + ISD::FABS, + ISD::FCEIL, + ISD::FSQRT, + ISD::FFLOOR, + ISD::FNEARBYINT, + ISD::FRINT, + ISD::FROUND, + ISD::FROUNDEVEN, + ISD::FTRUNC, + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::FMINIMUM, + ISD::FMAXIMUM, + ISD::STRICT_FADD, + ISD::STRICT_FSUB, + ISD::STRICT_FMUL, + ISD::STRICT_FDIV, + ISD::STRICT_FMA, + ISD::STRICT_FCEIL, + ISD::STRICT_FFLOOR, + ISD::STRICT_FSQRT, + ISD::STRICT_FRINT, + ISD::STRICT_FNEARBYINT, + ISD::STRICT_FROUND, + ISD::STRICT_FTRUNC, + ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FMINNUM, + ISD::STRICT_FMAXNUM, + ISD::STRICT_FMINIMUM, + ISD::STRICT_FMAXIMUM}) setOperationAction(Op, MVT::f16, Promote); // Round-to-integer need custom lowering for fp16, as Promote doesn't work @@ -628,53 +652,53 @@ setOperationAction(Op, MVT::f16, Custom); // promote v4f16 to v4f32 when that is known to be safe. - setOperationAction(ISD::FADD, MVT::v4f16, Promote); - setOperationAction(ISD::FSUB, MVT::v4f16, Promote); - setOperationAction(ISD::FMUL, MVT::v4f16, Promote); - setOperationAction(ISD::FDIV, MVT::v4f16, Promote); - AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); - - setOperationAction(ISD::FABS, MVT::v4f16, Expand); - setOperationAction(ISD::FNEG, MVT::v4f16, Expand); - setOperationAction(ISD::FROUND, MVT::v4f16, Expand); - setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand); - setOperationAction(ISD::FMA, MVT::v4f16, Expand); - setOperationAction(ISD::SETCC, MVT::v4f16, Expand); - setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); - setOperationAction(ISD::SELECT, MVT::v4f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); - setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); - setOperationAction(ISD::FRINT, MVT::v4f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); - setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); - - setOperationAction(ISD::FABS, MVT::v8f16, Expand); - setOperationAction(ISD::FADD, MVT::v8f16, Expand); - setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); - setOperationAction(ISD::FDIV, MVT::v8f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); - setOperationAction(ISD::FMA, MVT::v8f16, Expand); - setOperationAction(ISD::FMUL, MVT::v8f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); - setOperationAction(ISD::FNEG, MVT::v8f16, Expand); - setOperationAction(ISD::FROUND, MVT::v8f16, Expand); - setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand); - setOperationAction(ISD::FRINT, MVT::v8f16, Expand); - setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); - setOperationAction(ISD::FSUB, MVT::v8f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); - setOperationAction(ISD::SETCC, MVT::v8f16, Expand); - setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); - setOperationAction(ISD::SELECT, MVT::v8f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); + setOperationAction(ISD::FADD, MVT::v4f16, Promote); + setOperationAction(ISD::FSUB, MVT::v4f16, Promote); + setOperationAction(ISD::FMUL, MVT::v4f16, Promote); + setOperationAction(ISD::FDIV, MVT::v4f16, Promote); + AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); + + setOperationAction(ISD::FABS, MVT::v4f16, Expand); + setOperationAction(ISD::FNEG, MVT::v4f16, Expand); + setOperationAction(ISD::FROUND, MVT::v4f16, Expand); + setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand); + setOperationAction(ISD::FMA, MVT::v4f16, Expand); + setOperationAction(ISD::SETCC, MVT::v4f16, Expand); + setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); + setOperationAction(ISD::SELECT, MVT::v4f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); + setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); + setOperationAction(ISD::FRINT, MVT::v4f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); + + setOperationAction(ISD::FABS, MVT::v8f16, Expand); + setOperationAction(ISD::FADD, MVT::v8f16, Expand); + setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); + setOperationAction(ISD::FDIV, MVT::v8f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); + setOperationAction(ISD::FMA, MVT::v8f16, Expand); + setOperationAction(ISD::FMUL, MVT::v8f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); + setOperationAction(ISD::FNEG, MVT::v8f16, Expand); + setOperationAction(ISD::FROUND, MVT::v8f16, Expand); + setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand); + setOperationAction(ISD::FRINT, MVT::v8f16, Expand); + setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); + setOperationAction(ISD::FSUB, MVT::v8f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); + setOperationAction(ISD::SETCC, MVT::v8f16, Expand); + setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); + setOperationAction(ISD::SELECT, MVT::v8f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); } // AArch64 has implementations of a lot of rounding-like FP operations. @@ -1025,8 +1049,8 @@ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); } - setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); - setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); + setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); + setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal); setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal); setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom); @@ -1048,8 +1072,8 @@ setOperationAction(ISD::MUL, MVT::v2i64, Custom); // Saturates - for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, - MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, + MVT::v4i32, MVT::v2i64}) { setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); @@ -1067,8 +1091,8 @@ } // Vector reductions - for (MVT VT : { MVT::v4f16, MVT::v2f32, - MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { + for (MVT VT : + {MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64}) { if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); @@ -1076,8 +1100,8 @@ setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); } } - for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, - MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { + for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, + MVT::v4i32}) { setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); @@ -1129,18 +1153,18 @@ setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); // ADDP custom lowering - for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) + for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) setOperationAction(ISD::ADD, VT, Custom); // FADDP custom lowering - for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 }) + for (MVT VT : {MVT::v16f16, MVT::v8f32, MVT::v4f64}) setOperationAction(ISD::FADD, VT, Custom); } @@ -1229,8 +1253,8 @@ setOperationAction(ISD::BITCAST, VT, Custom); for (auto VT : - { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, - MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) + {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, + MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16}) setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); for (auto VT : @@ -1428,8 +1452,8 @@ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom); // Int operations with no NEON support. - for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, - MVT::v2i32, MVT::v4i32, MVT::v2i64}) { + for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32, + MVT::v4i32, MVT::v2i64}) { setOperationAction(ISD::BITREVERSE, VT, Custom); setOperationAction(ISD::CTTZ, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); @@ -1534,8 +1558,7 @@ // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP // NEON types. - if (VT.isFloatingPoint() && - VT.getVectorElementType() != MVT::bf16 && + if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 && (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) for (unsigned Opcode : {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM, @@ -1830,8 +1853,8 @@ if (NewImm == 0 || NewImm == OrigMask) { New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), TLO.DAG.getConstant(NewImm, DL, VT)); - // Otherwise, create a machine node so that target independent DAG combine - // doesn't undo this optimization. + // Otherwise, create a machine node so that target independent DAG combine + // doesn't undo this optimization. } else { Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); @@ -1949,7 +1972,8 @@ ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { - default: return; + default: + return; case Intrinsic::aarch64_ldaxr: case Intrinsic::aarch64_ldxr: { unsigned BitWidth = Known.getBitWidth(); @@ -2412,8 +2436,9 @@ return EndBB; } -MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( - MachineInstr &MI, MachineBasicBlock *BB) const { +MachineBasicBlock * +AArch64TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, + MachineBasicBlock *BB) const { assert(!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && "SEH does not use catchret!"); @@ -2934,10 +2959,9 @@ // Similarly, (CMP (and X, Y), 0) can be implemented with a TST // (a.k.a. ANDS) except that the flags are only guaranteed to work for one // of the signed comparisons. - const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl, - DAG.getVTList(VT, MVT_CC), - LHS.getOperand(0), - LHS.getOperand(1)); + const SDValue ANDSNode = + DAG.getNode(AArch64ISD::ANDS, dl, DAG.getVTList(VT, MVT_CC), + LHS.getOperand(0), LHS.getOperand(1)); // Replace all users of (and X, Y) with newly generated (ands X, Y) DAG.ReplaceAllUsesWith(LHS, ANDSNode); return ANDSNode.getValue(1); @@ -3073,11 +3097,11 @@ SDValue O1 = Val->getOperand(1); bool CanNegateL; bool MustBeFirstL; - if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) + if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth + 1)) return false; bool CanNegateR; bool MustBeFirstR; - if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) + if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth + 1)) return false; if (MustBeFirstL && MustBeFirstR) @@ -3114,8 +3138,8 @@ /// \p Negate is true if we want this sub-tree being negated just by changing /// SETCC conditions. static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, - AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, - AArch64CC::CondCode Predicate) { + AArch64CC::CondCode &OutCC, bool Negate, + SDValue CCOp, AArch64CC::CondCode Predicate) { // We're at a tree leaf, produce a conditional comparison operation. unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { @@ -3309,8 +3333,7 @@ case ISD::SETGT: if ((VT == MVT::i32 && C != INT32_MAX && isLegalArithImmed((uint32_t)(C + 1))) || - (VT == MVT::i64 && C != INT64_MAX && - isLegalArithImmed(C + 1ULL))) { + (VT == MVT::i64 && C != INT64_MAX && isLegalArithImmed(C + 1ULL))) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; RHS = DAG.getConstant(C, dl, VT); @@ -3381,9 +3404,9 @@ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, DAG.getValueType(MVT::i16)); - Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, - RHS.getValueType()), - CC, dl, DAG); + Cmp = emitComparison( + SExt, DAG.getConstant(ValueofRHS, dl, RHS.getValueType()), CC, dl, + DAG); AArch64CC = changeIntCCToAArch64CC(CC); } } @@ -3475,10 +3498,9 @@ } else { SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = - DAG.getNode(AArch64ISD::SUBS, DL, VTs, - DAG.getConstant(0, DL, MVT::i64), - UpperBits).getValue(1); + Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, + DAG.getConstant(0, DL, MVT::i64), UpperBits) + .getValue(1); } break; } @@ -3660,8 +3682,8 @@ // too. This will allow it to be selected to a single instruction: // CSINC Wd, WZR, WZR, invert(cond). SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); - Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, - CCVal, Overflow); + Overflow = + DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); @@ -3690,10 +3712,10 @@ } // built the mask value encoding the expected behavior. - unsigned PrfOp = (IsWrite << 4) | // Load/Store bit - (!IsData << 3) | // IsDataCache bit - (Locality << 1) | // Cache level bits - (unsigned)IsStream; // Stream bit + unsigned PrfOp = (IsWrite << 4) | // Load/Store bit + (!IsData << 3) | // IsDataCache bit + (Locality << 1) | // Cache level bits + (unsigned)IsStream; // Stream bit return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1)); } @@ -3757,8 +3779,7 @@ unsigned NumElts = InVT.getVectorNumElements(); // f16 conversions are promoted to f32 when full fp16 is not supported. - if (InVT.getVectorElementType() == MVT::f16 && - !Subtarget->hasFullFP16()) { + if (InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) { MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); SDLoc dl(Op); if (IsStrict) { @@ -3840,9 +3861,8 @@ return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other}, {Ext.getValue(1), Ext.getValue(0)}); } - return DAG.getNode( - Op.getOpcode(), dl, Op.getValueType(), - DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); } if (SrcVal.getValueType() != MVT::f128) { @@ -4015,8 +4035,7 @@ MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), InVT.getVectorNumElements()); if (IsStrict) { - In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, - {Op.getOperand(0), In}); + In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, {Op.getOperand(0), In}); return DAG.getNode( ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)}); @@ -4038,9 +4057,9 @@ // Use a scalar operation for conversions between single-element vectors of // the same size. if (VT.getVectorNumElements() == 1) { - SDValue Extract = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), - In, DAG.getConstant(0, dl, MVT::i64)); + SDValue Extract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), In, + DAG.getConstant(0, dl, MVT::i64)); EVT ScalarVT = VT.getScalarType(); if (IsStrict) return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, @@ -4052,7 +4071,7 @@ } SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); @@ -4069,10 +4088,9 @@ ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other}, {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)}); } - return DAG.getNode( - ISD::FP_ROUND, dl, MVT::f16, - DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), - DAG.getIntPtrConstant(0, dl)); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f16, + DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), + DAG.getIntPtrConstant(0, dl)); } // i128 conversions are libcalls. @@ -4104,8 +4122,8 @@ Entry.IsZExt = false; Args.push_back(Entry); - RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 - : RTLIB::SINCOS_STRET_F32; + RTLIB::Libcall LC = + ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; const char *LibcallName = getLibcallName(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); @@ -4176,12 +4194,13 @@ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; switch (OrigSimpleTy) { - default: llvm_unreachable("Unexpected Vector Type"); + default: + llvm_unreachable("Unexpected Vector Type"); case MVT::v2i8: case MVT::v2i16: - return MVT::v2i32; + return MVT::v2i32; case MVT::v4i8: - return MVT::v4i16; + return MVT::v4i16; } } @@ -4249,10 +4268,9 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND) - return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG, - N->getOperand(0)->getValueType(0), - N->getValueType(0), - N->getOpcode()); + return addRequiredExtensionForVectorMULL( + N->getOperand(0), DAG, N->getOperand(0)->getValueType(0), + N->getValueType(0), N->getOpcode()); assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); @@ -4288,8 +4306,8 @@ if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); - return N0->hasOneUse() && N1->hasOneUse() && - isSignExtended(N0, DAG) && isSignExtended(N1, DAG); + return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) && + isSignExtended(N1, DAG); } return false; } @@ -4299,8 +4317,8 @@ if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); - return N0->hasOneUse() && N1->hasOneUse() && - isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); + return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) && + isZeroExtended(N1, DAG); } return false; } @@ -4405,11 +4423,11 @@ NewOpc = AArch64ISD::SMULL; isMLA = true; } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { - NewOpc = AArch64ISD::UMULL; + NewOpc = AArch64ISD::UMULL; isMLA = true; } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { std::swap(N0, N1); - NewOpc = AArch64ISD::UMULL; + NewOpc = AArch64ISD::UMULL; isMLA = true; } } @@ -4441,11 +4459,12 @@ SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG); SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); - return DAG.getNode(N0->getOpcode(), DL, VT, - DAG.getNode(NewOpc, DL, VT, - DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), - DAG.getNode(NewOpc, DL, VT, - DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); + return DAG.getNode( + N0->getOpcode(), DL, VT, + DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), + Op1), + DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), + Op1)); } static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, @@ -4537,12 +4556,14 @@ } } -SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const { +SDValue +AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); switch (IntNo) { - default: return SDValue(); // Don't custom lower most intrinsics. + default: + return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); @@ -4550,8 +4571,8 @@ case Intrinsic::aarch64_neon_abs: { EVT Ty = Op.getValueType(); if (Ty == MVT::i64) { - SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, - Op.getOperand(1)); + SDValue Result = + DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Op.getOperand(1)); Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result); return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result); } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) { @@ -4584,17 +4605,17 @@ Op2); } case Intrinsic::aarch64_neon_smax: - return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2)); case Intrinsic::aarch64_neon_umax: - return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2)); case Intrinsic::aarch64_neon_smin: - return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2)); case Intrinsic::aarch64_neon_umin: - return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2)); case Intrinsic::aarch64_sve_sunpkhi: return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), @@ -4624,8 +4645,8 @@ return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(), Op.getOperand(1)); case Intrinsic::aarch64_sve_tbl: - return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2)); case Intrinsic::aarch64_sve_trn1: return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); @@ -4697,8 +4718,9 @@ return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_frinti: - return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), - Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, + Op.getValueType(), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(1)); case Intrinsic::aarch64_sve_frintx: return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); @@ -4706,8 +4728,9 @@ return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_frintn: - return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(), - Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, + Op.getValueType(), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(1)); case Intrinsic::aarch64_sve_frintz: return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); @@ -4720,13 +4743,11 @@ Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_fcvtzu: - return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, - Op.getValueType(), Op.getOperand(2), Op.getOperand(3), - Op.getOperand(1)); + return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_fcvtzs: - return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, - Op.getValueType(), Op.getOperand(2), Op.getOperand(3), - Op.getOperand(1)); + return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_fsqrt: return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); @@ -4868,8 +4889,8 @@ } case Intrinsic::aarch64_neon_sabd: case Intrinsic::aarch64_neon_uabd: { - unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU - : ISD::ABDS; + unsigned Opcode = + IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU : ISD::ABDS; return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } @@ -5185,9 +5206,8 @@ } // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16. -static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, - EVT VT, EVT MemVT, - SelectionDAG &DAG) { +static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, + EVT MemVT, SelectionDAG &DAG) { assert(VT.isVector() && "VT should be a vector type"); assert(MemVT == MVT::v4i8 && VT == MVT::v4i16); @@ -5201,29 +5221,28 @@ // str s0, [x0] SDValue Undef = DAG.getUNDEF(MVT::i16); - SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL, - {Undef, Undef, Undef, Undef}); + SDValue UndefVec = + DAG.getBuildVector(MVT::v4i16, DL, {Undef, Undef, Undef, Undef}); - SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, - Value, UndefVec); + SDValue TruncExt = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Value, UndefVec); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt); Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc); SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Trunc, DAG.getConstant(0, DL, MVT::i64)); - return DAG.getStore(ST->getChain(), DL, ExtractTrunc, - ST->getBasePtr(), ST->getMemOperand()); + return DAG.getStore(ST->getChain(), DL, ExtractTrunc, ST->getBasePtr(), + ST->getMemOperand()); } // Custom lowering for any store, vector or scalar and/or default or with // a truncate operations. Currently only custom lower truncate operation // from vector v4i16 to v4i8 or volatile stores of i128. -SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc Dl(Op); StoreSDNode *StoreNode = cast(Op); - assert (StoreNode && "Can only custom lower store nodes"); + assert(StoreNode && "Can only custom lower store nodes"); SDValue Value = StoreNode->getValue(); @@ -5283,8 +5302,8 @@ SDValue Base = StoreNode->getBasePtr(); EVT PtrVT = Base.getValueType(); for (unsigned i = 0; i < 8; i++) { - SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, - Value, DAG.getConstant(i, Dl, MVT::i32)); + SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, Value, + DAG.getConstant(i, Dl, MVT::i32)); SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base, DAG.getConstant(i * 8, Dl, PtrVT)); Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(), @@ -5321,8 +5340,7 @@ return Result; } -SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *LoadNode = cast(Op); assert(LoadNode && "Expected custom lowering of a load node"); @@ -5335,9 +5353,9 @@ for (unsigned i = 0; i < 8; i++) { SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, DAG.getConstant(i * 8, DL, PtrVT)); - SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr, - LoadNode->getPointerInfo(), - LoadNode->getOriginalAlign()); + SDValue Part = + DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), + LoadNode->getOriginalAlign()); Ops.push_back(Part); Chain = SDValue(Part.getNode(), 1); } @@ -5385,9 +5403,8 @@ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(0)); // Generate SUBS & CSEL. - SDValue Cmp = - DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), - Op.getOperand(0), DAG.getConstant(0, DL, VT)); + SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), + Op.getOperand(0), DAG.getConstant(0, DL, VT)); return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, DAG.getConstant(AArch64CC::PL, DL, MVT::i32), Cmp.getValue(1)); @@ -5793,13 +5810,13 @@ return CC_AArch64_DarwinPCS; return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg : CC_AArch64_DarwinPCS_VarArg; - case CallingConv::Win64: + case CallingConv::Win64: return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; - case CallingConv::CFGuard_Check: - return CC_AArch64_Win64_CFGuard_Check; - case CallingConv::AArch64_VectorCall: - case CallingConv::AArch64_SVE_VectorCall: - return CC_AArch64_AAPCS; + case CallingConv::CFGuard_Check: + return CC_AArch64_Win64_CFGuard_Check; + case CallingConv::AArch64_VectorCall: + case CallingConv::AArch64_SVE_VectorCall: + return CC_AArch64_AAPCS; } } @@ -5822,7 +5839,8 @@ SmallVector Outs; GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs, DAG.getTargetLoweringInfo(), MF.getDataLayout()); - if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); })) + if (any_of(Outs, + [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); })) FuncInfo->setIsSVECC(true); // Assign locations to all of the incoming arguments. @@ -5876,10 +5894,10 @@ int Size = Ins[i].Flags.getByValSize(); unsigned NumRegs = (Size + 7) / 8; - // FIXME: This works on big-endian for composite byvals, which are the common - // case. It should also work for fundamental types too. + // FIXME: This works on big-endian for composite byvals, which are the + // common case. It should also work for fundamental types too. unsigned FrameIdx = - MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); + MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT); InVals.push_back(FrameIdxN); @@ -5951,7 +5969,8 @@ unsigned ArgOffset = VA.getLocMemOffset(); unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect ? VA.getLocVT().getSizeInBits() - : VA.getValVT().getSizeInBits()) / 8; + : VA.getValVT().getSizeInBits()) / + 8; uint32_t BEAlign = 0; if (!Subtarget->isLittleEndian() && ArgSize < 8 && @@ -5997,7 +6016,7 @@ if (VA.getLocInfo() == CCValAssign::Indirect) { assert(VA.getValVT().isScalableVector() && - "Only scalable vectors can be passed indirectly"); + "Only scalable vectors can be passed indirectly"); uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize(); unsigned NumParts = 1; @@ -6056,8 +6075,8 @@ // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. - // Win64 variadic functions also pass arguments in registers, but all float - // arguments are passed in integer registers. + // Win64 variadic functions also pass arguments in registers, but all + // float arguments are passed in integer registers. saveVarArgRegisters(CCInfo, DAG, DL, Chain); } @@ -6073,7 +6092,7 @@ RegParmTypes.push_back(MVT::f128); // Compute the set of forwarded registers. The rest are scratch. SmallVectorImpl &Forwards = - FuncInfo->getForwardedMustTailRegParms(); + FuncInfo->getForwardedMustTailRegParms(); CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_AArch64_AAPCS); @@ -6139,13 +6158,14 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsWin64 = + Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); SmallVector MemOps; - static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, - AArch64::X6, AArch64::X7 }; + static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7}; static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); @@ -6156,7 +6176,8 @@ GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); if (GPRSaveSize & 15) // The extra size here, if triggered, will always be 8. - MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); + MFI.CreateFixedObject(16 - (GPRSaveSize & 15), + -(int)alignTo(GPRSaveSize, 16), false); } else GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false); @@ -6324,9 +6345,9 @@ } } else { // Get type of the original argument. - EVT ActualVT = - TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty, - /*AllowUnknown*/ true); + EVT ActualVT = TLI.getValueType(DAG.getDataLayout(), + CLI.Args[Outs[i].OrigArgIndex].Ty, + /*AllowUnknown*/ true); MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT; // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) @@ -6394,7 +6415,8 @@ return false; } - if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + if (canGuaranteeTCO(CalleeCC, + getTargetMachine().Options.GuaranteedTailCallOpt)) return CCMatch; // Externally-defined functions with weak linkage should not be @@ -6450,10 +6472,11 @@ analyzeCallOperands(*this, Subtarget, CLI, CCInfo); if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) { - // When we are musttail, additional checks have been done and we can safely ignore this check - // At least two cases here: if caller is fastcc then we can't have any - // memory arguments (we'd be expected to clean up the stack afterwards). If - // caller is C then we could potentially use its argument area. + // When we are musttail, additional checks have been done and we can safely + // ignore this check At least two cases here: if caller is fastcc then we + // can't have any memory arguments (we'd be expected to clean up the stack + // afterwards). If caller is C then we could potentially use its argument + // area. // FIXME: for now we take the most conservative of these in both cases: // disallow all variadic memory operands. @@ -6570,12 +6593,10 @@ // Check callee args/returns for SVE registers and set calling convention // accordingly. if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) { - bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ - return Out.VT.isScalableVector(); - }); - bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ - return In.VT.isScalableVector(); - }); + bool CalleeOutSVE = any_of( + Outs, [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); }); + bool CalleeInSVE = + any_of(Ins, [](ISD::InputArg &In) { return In.VT.isScalableVector(); }); if (CalleeInSVE || CalleeOutSVE) CallConv = CallingConv::AArch64_SVE_VectorCall; @@ -6673,7 +6694,7 @@ const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); - RegsToPass.emplace_back(F.PReg, Val); + RegsToPass.emplace_back(F.PReg, Val); } } @@ -6892,8 +6913,8 @@ // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (auto &RegToPass : RegsToPass) { - Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first, - RegToPass.second, InFlag); + Chain = + DAG.getCopyToReg(Chain, DL, RegToPass.first, RegToPass.second, InFlag); InFlag = Chain.getValue(1); } @@ -6947,8 +6968,8 @@ // Add argument registers to the end of the list so that they are known live // into the call. for (auto &RegToPass : RegsToPass) - Ops.push_back(DAG.getRegister(RegToPass.first, - RegToPass.second.getValueType())); + Ops.push_back( + DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; @@ -7118,7 +7139,7 @@ Flag = Chain.getValue(1); RetOps.push_back( - DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); + DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); } const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -7167,7 +7188,7 @@ N->getOffset(), Flag); } -SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, +SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); @@ -7375,8 +7396,7 @@ HiVar, DAG.getTargetConstant(0, DL, MVT::i32)), 0); - return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, - LoVar, + return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, LoVar, DAG.getTargetConstant(0, DL, MVT::i32)), 0); } @@ -7583,8 +7603,8 @@ // The pointer to the thread's TLS data area is at the TLS Index scaled by 8 // offset into the TLSArray. TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex); - SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, - DAG.getConstant(3, DL, PtrVT)); + SDValue Slot = + DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(3, DL, PtrVT)); SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), MachinePointerInfo()); @@ -8291,7 +8311,7 @@ else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE) FVal = LHS; } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) { - assert (CTVal && CFVal && "Expected constant operands for CSNEG."); + assert(CTVal && CFVal && "Expected constant operands for CSNEG."); // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to // avoid materializing C. AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); @@ -8501,8 +8521,7 @@ return getAddr(JT, DAG); } -SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. SDLoc DL(Op); @@ -8538,7 +8557,7 @@ } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { BlockAddressSDNode *BA = cast(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { @@ -8550,7 +8569,7 @@ } SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { AArch64FunctionInfo *FuncInfo = DAG.getMachineFunction().getInfo(); @@ -8676,9 +8695,9 @@ SDLoc DL(Op); unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; unsigned VaListSize = - (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) - ? PtrSize - : Subtarget->isTargetILP32() ? 20 : 32; + (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) ? PtrSize + : Subtarget->isTargetILP32() ? 20 + : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); @@ -8750,7 +8769,7 @@ SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), DAG.getIntPtrConstant(1, DL, /*isTarget=*/true)); - SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; + SDValue Ops[] = {NarrowFP, WideFP.getValue(1)}; // Merge the rounded value with the chain output of the load. return DAG.getMergeValues(Ops, DL); } @@ -8794,8 +8813,9 @@ // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -Register AArch64TargetLowering:: -getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { +Register +AArch64TargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { Register Reg = MatchRegisterName(RegName); if (AArch64::X1 <= Reg && Reg <= AArch64::X28) { const MCRegisterInfo *MRI = Subtarget->getRegisterInfo(); @@ -8805,8 +8825,8 @@ } if (Reg) return Reg; - report_fatal_error(Twine("Invalid register name \"" - + StringRef(RegName) + "\".")); + report_fatal_error( + Twine("Invalid register name \"" + StringRef(RegName) + "\".")); } SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op, @@ -8904,14 +8924,14 @@ // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the // movw+movk is fused). So we limit up to 2 instrdduction at most. SmallVector Insn; - AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), - Insn); + AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), Insn); unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2)); IsLegal = Insn.size() <= Limit; } LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString() - << " imm value: "; Imm.dump();); + << " imm value: "; + Imm.dump();); return IsLegal; } @@ -8977,8 +8997,8 @@ // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2) // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) for (int i = ExtraSteps; i > 0; --i) { - SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, - Flags); + SDValue Step = + DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, Flags); Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags); Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); } @@ -9007,8 +9027,8 @@ // Newton reciprocal iteration: E * (2 - X * E) // AArch64 reciprocal iteration instruction: (2 - M * N) for (int i = ExtraSteps; i > 0; --i) { - SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, - Estimate, Flags); + SDValue Step = + DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, Estimate, Flags); Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); } @@ -9060,19 +9080,14 @@ if (ConstraintVT.isFloatingPoint()) return "w"; - if (ConstraintVT.isVector() && - (ConstraintVT.getSizeInBits() == 64 || - ConstraintVT.getSizeInBits() == 128)) + if (ConstraintVT.isVector() && (ConstraintVT.getSizeInBits() == 64 || + ConstraintVT.getSizeInBits() == 128)) return "w"; return "r"; } -enum PredicateConstraint { - Upl, - Upa, - Invalid -}; +enum PredicateConstraint { Upl, Upa, Invalid }; static PredicateConstraint parsePredicateConstraint(StringRef Constraint) { PredicateConstraint P = PredicateConstraint::Invalid; @@ -9114,7 +9129,7 @@ } } else if (parsePredicateConstraint(Constraint) != PredicateConstraint::Invalid) - return C_RegisterClass; + return C_RegisterClass; return TargetLowering::getConstraintType(Constraint); } @@ -9473,10 +9488,10 @@ int WindowScale; ShuffleSourceInfo(SDValue Vec) - : Vec(Vec), MinElt(std::numeric_limits::max()), MaxElt(0), + : Vec(Vec), MinElt(std::numeric_limits::max()), MaxElt(0), ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} - bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } + bool operator==(SDValue OtherVec) { return Vec == OtherVec; } }; // First gather all vectors used as an immediate source for this BUILD_VECTOR @@ -9562,8 +9577,8 @@ TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32)); assert((Mask.size() == 8 || Mask.size() == 16) && "Expected a v8i8 or v16i8 Mask"); - TBLOperands.push_back( - DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask)); + TBLOperands.push_back(DAG.getBuildVector( + Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask)); SDValue Shuffle = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, @@ -9654,14 +9669,15 @@ if (!SrcVT.is64BitVector()) { LLVM_DEBUG( - dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT " - "for SVE vectors."); + dbgs() + << "Reshuffle failed: don't know how to lower AArch64ISD::EXT " + "for SVE vectors."); return SDValue(); } - Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, - VEXTSrc2, - DAG.getConstant(Imm, dl, MVT::i32)); + Src.ShuffleVec = + DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, VEXTSrc2, + DAG.getConstant(Imm, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } @@ -9700,8 +9716,8 @@ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this // segment. EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); - int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(), - VT.getScalarSizeInBits()); + int BitsDefined = + std::min(OrigEltTy.getScalarSizeInBits(), VT.getScalarSizeInBits()); int LanesDefined = BitsDefined / BitsPerShuffleLane; // This source is expected to fill ResMultiplier lanes of the final shuffle, @@ -9720,12 +9736,12 @@ return SDValue(); } - SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; + SDValue ShuffleOps[] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)}; for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; - SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], - ShuffleOps[1], Mask); + SDValue Shuffle = + DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask); SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump(); @@ -9768,7 +9784,8 @@ // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from // v4i32s. This is really a truncate, which we can construct out of (legal) // concats and truncate nodes. -static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) { +static SDValue ReconstructTruncateFromBuildVector(SDValue V, + SelectionDAG &DAG) { if (V.getValueType() != MVT::v16i8) return SDValue(); assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR"); @@ -10057,8 +10074,8 @@ return true; } -static bool isINSMask(ArrayRef M, int NumInputElements, - bool &DstIsLeft, int &Anomaly) { +static bool isINSMask(ArrayRef M, int NumInputElements, bool &DstIsLeft, + int &Anomaly) { if (M.size() != static_cast(NumInputElements)) return false; @@ -10146,11 +10163,11 @@ /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. ID is the perfect-shuffle -//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle -//table entry and LHS/RHS are the immediate inputs for this stage of the -//shuffle. -static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, - SDValue V2, unsigned PFEntry, SDValue LHS, +// ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle +// table entry and LHS/RHS are the immediate inputs for this stage of the +// shuffle. +static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, + unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; @@ -10385,8 +10402,9 @@ Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst, - V2Cst, DAG.getBuildVector(IndexVT, DL, - makeArrayRef(TBLMask.data(), IndexLen))); + V2Cst, + DAG.getBuildVector(IndexVT, DL, + makeArrayRef(TBLMask.data(), IndexLen))); } } return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle); @@ -10591,9 +10609,9 @@ for (unsigned LaneSize : {64U, 32U, 16U}) { unsigned Lane = 0; if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) { - unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64 - : LaneSize == 32 ? AArch64ISD::DUPLANE32 - : AArch64ISD::DUPLANE16; + unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64 + : LaneSize == 32 ? AArch64ISD::DUPLANE32 + : AArch64ISD::DUPLANE16; // Cast V1 to an integer vector with required lane size MVT NewEltTy = MVT::getIntegerVT(LaneSize); unsigned NewEltCount = VT.getSizeInBits() / LaneSize; @@ -10793,7 +10811,6 @@ return DAG.getNode(ISD::BITCAST, DL, VT, TBL); } - static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits) { EVT VT = BVN->getValueType(0); @@ -10818,7 +10835,7 @@ // Try 64-bit splatted SIMD immediate. static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, - const APInt &Bits) { + const APInt &Bits) { if (Bits.getHiBits(64) == Bits.getLoBits(64)) { uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); EVT VT = Op.getValueType(); @@ -10828,8 +10845,8 @@ Value = AArch64_AM::encodeAdvSIMDModImmType10(Value); SDLoc dl(Op); - SDValue Mov = DAG.getNode(NewOp, dl, MovTy, - DAG.getConstant(Value, dl, MVT::i32)); + SDValue Mov = + DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -10851,16 +10868,13 @@ if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType1(Value); Shift = 0; - } - else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) { + } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType2(Value); Shift = 8; - } - else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) { + } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType3(Value); Shift = 16; - } - else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) { + } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType4(Value); Shift = 24; } @@ -10874,9 +10888,9 @@ DAG.getConstant(Value, dl, MVT::i32), DAG.getConstant(Shift, dl, MVT::i32)); else - Mov = DAG.getNode(NewOp, dl, MovTy, - DAG.getConstant(Value, dl, MVT::i32), - DAG.getConstant(Shift, dl, MVT::i32)); + Mov = + DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -10899,8 +10913,7 @@ if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType5(Value); Shift = 0; - } - else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) { + } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType6(Value); Shift = 8; } @@ -10914,9 +10927,9 @@ DAG.getConstant(Value, dl, MVT::i32), DAG.getConstant(Shift, dl, MVT::i32)); else - Mov = DAG.getNode(NewOp, dl, MovTy, - DAG.getConstant(Value, dl, MVT::i32), - DAG.getConstant(Shift, dl, MVT::i32)); + Mov = + DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -10938,17 +10951,16 @@ if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType7(Value); Shift = 264; - } - else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) { + } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType8(Value); Shift = 272; } if (isAdvSIMDModImm) { SDLoc dl(Op); - SDValue Mov = DAG.getNode(NewOp, dl, MovTy, - DAG.getConstant(Value, dl, MVT::i32), - DAG.getConstant(Shift, dl, MVT::i32)); + SDValue Mov = + DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -10968,8 +10980,8 @@ Value = AArch64_AM::encodeAdvSIMDModImmType9(Value); SDLoc dl(Op); - SDValue Mov = DAG.getNode(NewOp, dl, MovTy, - DAG.getConstant(Value, dl, MVT::i32)); + SDValue Mov = + DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -10990,17 +11002,16 @@ if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType11(Value); MovTy = isWide ? MVT::v4f32 : MVT::v2f32; - } - else if (isWide && - (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) { + } else if (isWide && + (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) { Value = AArch64_AM::encodeAdvSIMDModImmType12(Value); MovTy = MVT::v2f64; } if (isAdvSIMDModImm) { SDLoc dl(Op); - SDValue Mov = DAG.getNode(NewOp, dl, MovTy, - DAG.getConstant(Value, dl, MVT::i32)); + SDValue Mov = + DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -11143,16 +11154,15 @@ if (resolveBuildVector(BVN, DefBits, UndefBits)) { SDValue NewOp; - if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, - DefBits, &LHS)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, - DefBits, &LHS))) + if ((NewOp = + tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS))) return NewOp; - if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, - UndefBits, &LHS)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, - UndefBits, &LHS))) + if ((NewOp = + tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)) || + (NewOp = + tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS))) return NewOp; } @@ -11162,12 +11172,11 @@ // Normalize the operands of BUILD_VECTOR. The value of constant operands will // be truncated to fit element width. -static SDValue NormalizeBuildVector(SDValue Op, - SelectionDAG &DAG) { +static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); - EVT EltTy= VT.getVectorElementType(); + EVT EltTy = VT.getVectorElementType(); if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16) return Op; @@ -11179,8 +11188,7 @@ // (with operands cast to integers), then the only possibilities // are constants and UNDEFs. if (auto *CstLane = dyn_cast(Lane)) { - APInt LowBits(EltTy.getSizeInBits(), - CstLane->getZExtValue()); + APInt LowBits(EltTy.getSizeInBits(), CstLane->getZExtValue()); Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32); } else if (Lane.getNode()->isUndef()) { Lane = DAG.getUNDEF(MVT::i32); @@ -11431,8 +11439,9 @@ if (VT.getVectorElementType().isFloatingPoint()) { SmallVector Ops; EVT EltTy = VT.getVectorElementType(); - assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || - EltTy == MVT::f64) && "Unsupported floating-point vector type"); + assert((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || + EltTy == MVT::f64) && + "Unsupported floating-point vector type"); LLVM_DEBUG( dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int " "BITCASTS, and try again\n"); @@ -11971,8 +11980,9 @@ APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ElementBits) || + if (!BVN || + !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + ElementBits) || SplatBitSize > ElementBits) return false; Cnt = SplatBits.getSExtValue(); @@ -12043,10 +12053,10 @@ if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), DAG.getConstant(Cnt, DL, MVT::i32)); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, - MVT::i32), - Op.getOperand(0), Op.getOperand(1)); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32), + Op.getOperand(0), Op.getOperand(1)); case ISD::SRA: case ISD::SRL: if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) { @@ -12071,10 +12081,9 @@ // negate the shift amount SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(1)); - SDValue NegShiftLeft = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0), - NegShift); + SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Opc, DL, MVT::i32), + Op.getOperand(0), NegShift); return NegShiftLeft; } @@ -12219,7 +12228,7 @@ } assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || - LHS.getValueType().getVectorElementType() != MVT::f128); + LHS.getValueType().getVectorElementType() != MVT::f128); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two branches to implement. @@ -12227,15 +12236,14 @@ bool ShouldInvert; changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); - bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs(); - SDValue Cmp = - EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); + bool NoNaNs = + getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs(); + SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); if (!Cmp.getNode()) return SDValue(); if (CC2 != AArch64CC::AL) { - SDValue Cmp2 = - EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG); + SDValue Cmp2 = EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG); if (!Cmp2.getNode()) return SDValue(); @@ -12321,14 +12329,12 @@ case ISD::VECREDUCE_FMAX: { return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), - DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), - Src); + DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), Src); } case ISD::VECREDUCE_FMIN: { return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), - DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), - Src); + DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), Src); } default: llvm_unreachable("Unhandled reduction"); @@ -12650,7 +12656,7 @@ // The shift can be combined if it matches the size of the value being // loaded (and so reducing the width would make it not match). uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1); - uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8; + uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits() / 8; if (ShiftAmount == Log2_32(LoadBytes)) return false; } @@ -12695,10 +12701,10 @@ const DataLayout &DL = F->getParent()->getDataLayout(); Type *Ty = User->getOperand(0)->getType(); - return !(isFMAFasterThanFMulAndFAdd(*F, Ty) && - isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && - (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath)); + return !( + isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); } // All 32-bit GPR operations implicitly zero the high-half of the corresponding @@ -12757,14 +12763,15 @@ case Instruction::GetElementPtr: { gep_type_iterator GTI = gep_type_begin(Instr); auto &DL = Ext->getModule()->getDataLayout(); - std::advance(GTI, U.getOperandNo()-1); + std::advance(GTI, U.getOperandNo() - 1); Type *IdxTy = GTI.getIndexedType(); // This extension will end up with a shift because of the scaling factor. // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. // Get the shift amount based on the scaling factor: // log2(sizeof(IdxTy)) - log2(8). uint64_t ShiftAmt = - countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3; + countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - + 3; // Is the constant foldable in the shift of the addressing mode? // I.e., shift amount is between 1 and 4 inclusive. if (ShiftAmt == 0 || ShiftAmt > 4) @@ -13363,6 +13370,13 @@ auto Mask = SVI->getShuffleMask(); + // Sanity check if all the indices are NOT in range. + // If mask is `undef` or `poison`, `Mask` may be a vector of -1s. + // If all of them are `undef`, OOB read will happen later. + if (llvm::all_of(Mask, [](int Idx) { return Idx == UndefMaskElem; })) { + return false; + } + Type *PtrTy = UseScalable ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()) @@ -13576,8 +13590,8 @@ } // Same encoding for add/sub, just flip the sign. Immed = std::abs(Immed); - bool IsLegal = ((Immed >> 12) == 0 || - ((Immed & 0xfff) == 0 && Immed >> 24 == 0)); + bool IsLegal = + ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)); LLVM_DEBUG(dbgs() << "Is " << Immed << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n"); return IsLegal; @@ -13620,7 +13634,8 @@ /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS, Instruction *I) const { + unsigned AS, + Instruction *I) const { // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset @@ -13738,15 +13753,13 @@ // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. - static const MCPhysReg ScratchRegs[] = { - AArch64::X16, AArch64::X17, AArch64::LR, 0 - }; + static const MCPhysReg ScratchRegs[] = {AArch64::X16, AArch64::X17, + AArch64::LR, 0}; return ScratchRegs; } -bool -AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const { +bool AArch64TargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && "Expected shift op"); @@ -13754,8 +13767,8 @@ SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift 'N' to let it be lowered to UBFX. + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not + // combine it with shift 'N' to let it be lowered to UBFX. if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && isa(ShiftLHS.getOperand(1))) { uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); @@ -14063,7 +14076,6 @@ return SDValue(); } - static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -14079,7 +14091,7 @@ SmallVectorImpl &Created) const { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isIntDivCheap(N->getValueType(0), Attr)) - return SDValue(N,0); // Lower SDIV as SDIV + return SDValue(N, 0); // Lower SDIV as SDIV EVT VT = N->getValueType(0); @@ -14177,7 +14189,7 @@ } static bool IsSVECntIntrinsic(SDValue S) { - switch(getIntrinsicID(S.getNode())) { + switch (getIntrinsicID(S.getNode())) { default: break; case Intrinsic::aarch64_sve_cntb: @@ -14409,11 +14421,10 @@ // Allow the scaling to be folded into the `cnt` instruction by preventing // the scaling to be obscured here. This makes it easier to pattern match. - if (IsSVECntIntrinsic(N0) || - (N0->getOpcode() == ISD::TRUNCATE && - (IsSVECntIntrinsic(N0->getOperand(0))))) - if (ConstValue.sge(1) && ConstValue.sle(16)) - return SDValue(); + if (IsSVECntIntrinsic(N0) || (N0->getOpcode() == ISD::TRUNCATE && + (IsSVECntIntrinsic(N0->getOperand(0))))) + if (ConstValue.sge(1) && ConstValue.sle(16)) + return SDValue(); // Multiplication of a power of two plus/minus one can be done more // cheaply as as shift+add/sub. For now, this is true unilaterally. If @@ -14565,7 +14576,8 @@ // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. // This eliminates an "integer-to-vector-move" UOP and improves throughput. SDValue N0 = N->getOperand(0); - if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && + N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); @@ -14577,8 +14589,8 @@ // to use the new Chain. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); - unsigned Opcode = - (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF; + unsigned Opcode = (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF + : AArch64ISD::UITOF; return DAG.getNode(Opcode, SDLoc(N), VT, Load); } @@ -14691,7 +14703,8 @@ BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); - int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1); + int32_t C = + BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1); if (C == -1 || C == 0 || C > FloatBits) return SDValue(); @@ -14790,7 +14803,7 @@ } static SDValue tryCombineToBSL(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); @@ -15034,8 +15047,8 @@ Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0), DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32)); - SDValue And = DAG.getNode(ISD::AND, DL, - UnpkOp->getValueType(0), UnpkOp, Dup); + SDValue And = + DAG.getNode(ISD::AND, DL, UnpkOp->getValueType(0), UnpkOp, Dup); return DAG.getNode(Opc, DL, N->getValueType(0), And); } @@ -15295,8 +15308,7 @@ // with the strict_fadd, but we also need uses of the chain output of the // original strict_fadd to use the chain output of the new strict_fadd as // otherwise it may not be deleted. - SDValue Ret = DAG.getNode(N0->getOpcode(), DL, - {VT, MVT::Other}, + SDValue Ret = DAG.getNode(N0->getOpcode(), DL, {VT, MVT::Other}, {N0->getOperand(0), Extract1, Extract2}); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1)); @@ -15373,9 +15385,8 @@ Ops.push_back(DAG.getUNDEF(MVT::f32)); else { LoadSDNode *LD = cast(V); - SDValue NewLoad = - DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(), - LD->getMemOperand()); + SDValue NewLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), + LD->getBasePtr(), LD->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); Ops.push_back(NewLoad); } @@ -15385,7 +15396,6 @@ } } - // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) @@ -15731,11 +15741,11 @@ } // Returns true if Op is setcc or zext of setcc. -static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) { +static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info) { if (isSetCC(Op, Info)) return true; return ((Op.getOpcode() == ISD::ZERO_EXTEND) && - isSetCC(Op->getOperand(0), Info)); + isSetCC(Op->getOperand(0), Info)); } // The folding we want to perform is: @@ -16154,7 +16164,8 @@ if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT)) return SDValue(); - SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL); + SDValue SubvectorIdx = + DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL); SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext, @@ -16309,8 +16320,7 @@ SelectionDAG &DAG) { SDLoc dl(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), - DAG.getNode(Opc, dl, - N->getOperand(1).getSimpleValueType(), + DAG.getNode(Opc, dl, N->getOperand(1).getSimpleValueType(), N->getOperand(1)), DAG.getConstant(0, dl, MVT::i64)); } @@ -16412,7 +16422,7 @@ case Intrinsic::aarch64_sve_cmphs_wide: case Intrinsic::aarch64_sve_cmphi_wide: case Intrinsic::aarch64_sve_cmplo_wide: - case Intrinsic::aarch64_sve_cmpls_wide: { + case Intrinsic::aarch64_sve_cmpls_wide: { if (auto *CN = dyn_cast(Comparator.getOperand(0))) { uint64_t ImmVal = CN->getZExtValue(); if (ImmVal <= 127) @@ -16967,9 +16977,8 @@ uint64_t BaseOffset = 0; const MachinePointerInfo &PtrInfo = St.getPointerInfo(); - SDValue NewST1 = - DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, - OrigAlignment, St.getMemOperand()->getFlags()); + SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, + OrigAlignment, St.getMemOperand()->getFlags()); // As this in ISel, we will not merge this add which may degrade results. if (BasePtr->getOpcode() == ISD::ADD && @@ -17034,10 +17043,10 @@ ContainerVT = getSVEContainerType(ContainerVT); SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other); - SDValue Ops[] = { N->getOperand(0), // Chain - N->getOperand(2), // Pg - N->getOperand(3), // Base - DAG.getValueType(VT) }; + SDValue Ops[] = {N->getOperand(0), // Chain + N->getOperand(2), // Pg + N->getOperand(3), // Base + DAG.getValueType(VT)}; SDValue Load = DAG.getNode(Opc, DL, VTs, Ops); SDValue LoadChain = SDValue(Load.getNode(), 1); @@ -17045,7 +17054,7 @@ if (ContainerVT.isInteger() && (VT != ContainerVT)) Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0)); - return DAG.getMergeValues({ Load, LoadChain }, DL); + return DAG.getMergeValues({Load, LoadChain}, DL); } static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) { @@ -17059,16 +17068,16 @@ auto *MINode = cast(N); SDValue PassThru = DAG.getConstant(0, DL, LoadVT); - SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), - MINode->getOperand(3), DAG.getUNDEF(PtrTy), - MINode->getOperand(2), PassThru, - MINode->getMemoryVT(), MINode->getMemOperand(), - ISD::UNINDEXED, ISD::NON_EXTLOAD, false); + SDValue L = + DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), MINode->getOperand(3), + DAG.getUNDEF(PtrTy), MINode->getOperand(2), PassThru, + MINode->getMemoryVT(), MINode->getMemOperand(), + ISD::UNINDEXED, ISD::NON_EXTLOAD, false); - if (VT.isFloatingPoint()) { - SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) }; - return DAG.getMergeValues(Ops, DL); - } + if (VT.isFloatingPoint()) { + SDValue Ops[] = {DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1)}; + return DAG.getMergeValues(Ops, DL); + } return L; } @@ -17111,12 +17120,11 @@ else SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data); - SDValue Ops[] = { N->getOperand(0), // Chain - SrcNew, - N->getOperand(4), // Base - N->getOperand(3), // Pg - InputVT - }; + SDValue Ops[] = {N->getOperand(0), // Chain + SrcNew, + N->getOperand(4), // Base + N->getOperand(3), // Pg + InputVT}; return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops); } @@ -17268,7 +17276,7 @@ } // Check that all vector element locations were inserted to. if (IndexNotInserted.any()) - return SDValue(); + return SDValue(); return splitStoreSplat(DAG, St, SplatVal, NumVecElts); } @@ -17600,11 +17608,12 @@ SDValue Addr = LD->getOperand(1); SDValue Vector = N->getOperand(0); // Search for a use of the address operand that is an increment. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = - Addr.getNode()->use_end(); UI != UE; ++UI) { + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); + UI != UE; ++UI) { SDNode *User = *UI; - if (User->getOpcode() != ISD::ADD - || UI.getUse().getResNo() != Addr.getResNo()) + if (User->getOpcode() != ISD::ADD || + UI.getUse().getResNo() != Addr.getResNo()) continue; // If the increment is a constant, it must match the memory ref size. @@ -17630,19 +17639,19 @@ continue; SmallVector Ops; - Ops.push_back(LD->getOperand(0)); // Chain + Ops.push_back(LD->getOperand(0)); // Chain if (IsLaneOp) { - Ops.push_back(Vector); // The vector to be inserted - Ops.push_back(Lane); // The lane to be inserted in the vector + Ops.push_back(Vector); // The vector to be inserted + Ops.push_back(Lane); // The lane to be inserted in the vector } Ops.push_back(Addr); Ops.push_back(Inc); - EVT Tys[3] = { VT, MVT::i64, MVT::Other }; + EVT Tys[3] = {VT, MVT::i64, MVT::Other}; SDVTList SDTys = DAG.getVTList(Tys); - unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost; - SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, - MemVT, + unsigned NewOp = + IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost; + SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, MemVT, LoadSDN->getMemOperand()); // Update the uses. @@ -17651,8 +17660,8 @@ SDValue(UpdN.getNode(), 2) // Chain }; DCI.CombineTo(LD, NewResults); - DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result - DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register + DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result + DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register break; } @@ -17936,7 +17945,8 @@ // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + UE = Addr.getNode()->use_end(); + UI != UE; ++UI) { SDNode *User = *UI; if (User->getOpcode() != ISD::ADD || UI.getUse().getResNo() != Addr.getResNo()) @@ -17961,49 +17971,110 @@ unsigned NumVecs = 0; unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { - default: llvm_unreachable("unexpected intrinsic for Neon base update"); - case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; - NumVecs = 2; break; - case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post; - NumVecs = 3; break; - case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post; - NumVecs = 4; break; - case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post; - NumVecs = 2; IsStore = true; break; - case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post; - NumVecs = 3; IsStore = true; break; - case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post; - NumVecs = 4; IsStore = true; break; - case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post; - NumVecs = 2; break; - case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post; - NumVecs = 3; break; - case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post; - NumVecs = 4; break; - case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post; - NumVecs = 2; IsStore = true; break; - case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post; - NumVecs = 3; IsStore = true; break; - case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post; - NumVecs = 4; IsStore = true; break; - case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost; - NumVecs = 2; IsDupOp = true; break; - case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost; - NumVecs = 3; IsDupOp = true; break; - case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost; - NumVecs = 4; IsDupOp = true; break; - case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost; - NumVecs = 2; IsLaneOp = true; break; - case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost; - NumVecs = 3; IsLaneOp = true; break; - case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost; - NumVecs = 4; IsLaneOp = true; break; - case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost; - NumVecs = 2; IsStore = true; IsLaneOp = true; break; - case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost; - NumVecs = 3; IsStore = true; IsLaneOp = true; break; - case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost; - NumVecs = 4; IsStore = true; IsLaneOp = true; break; + default: + llvm_unreachable("unexpected intrinsic for Neon base update"); + case Intrinsic::aarch64_neon_ld2: + NewOpc = AArch64ISD::LD2post; + NumVecs = 2; + break; + case Intrinsic::aarch64_neon_ld3: + NewOpc = AArch64ISD::LD3post; + NumVecs = 3; + break; + case Intrinsic::aarch64_neon_ld4: + NewOpc = AArch64ISD::LD4post; + NumVecs = 4; + break; + case Intrinsic::aarch64_neon_st2: + NewOpc = AArch64ISD::ST2post; + NumVecs = 2; + IsStore = true; + break; + case Intrinsic::aarch64_neon_st3: + NewOpc = AArch64ISD::ST3post; + NumVecs = 3; + IsStore = true; + break; + case Intrinsic::aarch64_neon_st4: + NewOpc = AArch64ISD::ST4post; + NumVecs = 4; + IsStore = true; + break; + case Intrinsic::aarch64_neon_ld1x2: + NewOpc = AArch64ISD::LD1x2post; + NumVecs = 2; + break; + case Intrinsic::aarch64_neon_ld1x3: + NewOpc = AArch64ISD::LD1x3post; + NumVecs = 3; + break; + case Intrinsic::aarch64_neon_ld1x4: + NewOpc = AArch64ISD::LD1x4post; + NumVecs = 4; + break; + case Intrinsic::aarch64_neon_st1x2: + NewOpc = AArch64ISD::ST1x2post; + NumVecs = 2; + IsStore = true; + break; + case Intrinsic::aarch64_neon_st1x3: + NewOpc = AArch64ISD::ST1x3post; + NumVecs = 3; + IsStore = true; + break; + case Intrinsic::aarch64_neon_st1x4: + NewOpc = AArch64ISD::ST1x4post; + NumVecs = 4; + IsStore = true; + break; + case Intrinsic::aarch64_neon_ld2r: + NewOpc = AArch64ISD::LD2DUPpost; + NumVecs = 2; + IsDupOp = true; + break; + case Intrinsic::aarch64_neon_ld3r: + NewOpc = AArch64ISD::LD3DUPpost; + NumVecs = 3; + IsDupOp = true; + break; + case Intrinsic::aarch64_neon_ld4r: + NewOpc = AArch64ISD::LD4DUPpost; + NumVecs = 4; + IsDupOp = true; + break; + case Intrinsic::aarch64_neon_ld2lane: + NewOpc = AArch64ISD::LD2LANEpost; + NumVecs = 2; + IsLaneOp = true; + break; + case Intrinsic::aarch64_neon_ld3lane: + NewOpc = AArch64ISD::LD3LANEpost; + NumVecs = 3; + IsLaneOp = true; + break; + case Intrinsic::aarch64_neon_ld4lane: + NewOpc = AArch64ISD::LD4LANEpost; + NumVecs = 4; + IsLaneOp = true; + break; + case Intrinsic::aarch64_neon_st2lane: + NewOpc = AArch64ISD::ST2LANEpost; + NumVecs = 2; + IsStore = true; + IsLaneOp = true; + break; + case Intrinsic::aarch64_neon_st3lane: + NewOpc = AArch64ISD::ST3LANEpost; + NumVecs = 3; + IsStore = true; + IsLaneOp = true; + break; + case Intrinsic::aarch64_neon_st4lane: + NewOpc = AArch64ISD::ST4LANEpost; + NumVecs = 4; + IsStore = true; + IsLaneOp = true; + break; } EVT VecTy; @@ -18038,14 +18109,14 @@ unsigned n; for (n = 0; n < NumResultVecs; ++n) Tys[n] = VecTy; - Tys[n++] = MVT::i64; // Type of write back register - Tys[n] = MVT::Other; // Type of the chain + Tys[n++] = MVT::i64; // Type of write back register + Tys[n] = MVT::Other; // Type of the chain SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2)); MemIntrinsicSDNode *MemInt = cast(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops, - MemInt->getMemoryVT(), - MemInt->getMemOperand()); + SDValue UpdN = + DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops, + MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. std::vector NewResults; @@ -18063,16 +18134,16 @@ // Checks to see if the value is the prescribed width and returns information // about its extension mode. -static -bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) { +static bool checkValueWidth(SDValue V, unsigned width, + ISD::LoadExtType &ExtType) { ExtType = ISD::NON_EXTLOAD; - switch(V.getNode()->getOpcode()) { + switch (V.getNode()->getOpcode()) { default: return false; case ISD::LOAD: { LoadSDNode *LoadNode = cast(V.getNode()); - if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) - || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) { + if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) || + (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) { ExtType = LoadNode->getExtensionType(); return true; } @@ -18080,8 +18151,8 @@ } case ISD::AssertSext: { VTSDNode *TypeNode = cast(V.getNode()->getOperand(1)); - if ((TypeNode->getVT() == MVT::i8 && width == 8) - || (TypeNode->getVT() == MVT::i16 && width == 16)) { + if ((TypeNode->getVT() == MVT::i8 && width == 8) || + (TypeNode->getVT() == MVT::i16 && width == 16)) { ExtType = ISD::SEXTLOAD; return true; } @@ -18089,8 +18160,8 @@ } case ISD::AssertZext: { VTSDNode *TypeNode = cast(V.getNode()->getOperand(1)); - if ((TypeNode->getVT() == MVT::i8 && width == 8) - || (TypeNode->getVT() == MVT::i16 && width == 16)) { + if ((TypeNode->getVT() == MVT::i8 && width == 8) || + (TypeNode->getVT() == MVT::i16 && width == 16)) { ExtType = ISD::ZEXTLOAD; return true; } @@ -18181,9 +18252,9 @@ // the whole range we can just adjust the input and avoid writing equations // for sign extended inputs. if (ExtType == ISD::SEXTLOAD) - AddConstant -= (1 << (width-1)); + AddConstant -= (1 << (width - 1)); - switch(CC) { + switch (CC) { case AArch64CC::LE: case AArch64CC::GT: if ((AddConstant == 0) || @@ -18194,22 +18265,20 @@ break; case AArch64CC::LT: case AArch64CC::GE: - if ((AddConstant == 0) || - (AddConstant >= 0 && CompConstant <= 0) || + if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant)) return true; break; case AArch64CC::HI: case AArch64CC::LS: if ((AddConstant >= 0 && CompConstant < 0) || - (AddConstant <= 0 && CompConstant >= -1 && - CompConstant < AddConstant + MaxUInt)) + (AddConstant <= 0 && CompConstant >= -1 && + CompConstant < AddConstant + MaxUInt)) return true; - break; + break; case AArch64CC::PL: case AArch64CC::MI: - if ((AddConstant == 0) || - (AddConstant > 0 && CompConstant <= 0) || + if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) || (AddConstant < 0 && CompConstant <= AddConstant)) return true; break; @@ -18242,11 +18311,10 @@ return false; } -static -SDValue performCONDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG, unsigned CCIndex, - unsigned CmpIndex) { +static SDValue performCONDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, unsigned CCIndex, + unsigned CmpIndex) { unsigned CC = cast(N->getOperand(CCIndex))->getSExtValue(); SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); unsigned CondOpcode = SubsNode->getOpcode(); @@ -18296,19 +18364,20 @@ if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) || !checkValueWidth(AddInputValue2, MaskBits, ExtType) || - !checkValueWidth(AddInputValue1, MaskBits, ExtType) ) + !checkValueWidth(AddInputValue1, MaskBits, ExtType)) return SDValue(); - if(!isEquivalentMaskless(CC, MaskBits, ExtType, - cast(AddInputValue2.getNode())->getSExtValue(), - cast(SubsInputValue.getNode())->getSExtValue())) + if (!isEquivalentMaskless( + CC, MaskBits, ExtType, + cast(AddInputValue2.getNode())->getSExtValue(), + cast(SubsInputValue.getNode())->getSExtValue())) return SDValue(); // The AND is not necessary, remove it. - SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0), - SubsNode->getValueType(1)); - SDValue Ops[] = { AddValue, SubsNode->getOperand(1) }; + SDVTList VTs = + DAG.getVTList(SubsNode->getValueType(0), SubsNode->getValueType(1)); + SDValue Ops[] = {AddValue, SubsNode->getOperand(1)}; SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops); DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode()); @@ -18482,7 +18551,7 @@ // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 if (SDValue Folded = foldCSELofCTTZ(N, DAG)) - return Folded; + return Folded; return performCONDCombine(N, DCI, DAG, 2, 3); } @@ -18913,8 +18982,7 @@ SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, - IfTrue, IfFalse); + return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, IfTrue, IfFalse); } /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with @@ -18969,17 +19037,15 @@ // First perform a vector comparison, where lane 0 is the one we're interested // in. SDLoc DL(N0); - SDValue LHS = - DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0)); - SDValue RHS = - DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1)); + SDValue LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0)); + SDValue RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1)); SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2)); // Now duplicate the comparison mask we want across all other lanes. SmallVector DUPMask(CCVT.getVectorNumElements(), 0); SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask); - Mask = DAG.getNode(ISD::BITCAST, DL, - ResVT.changeVectorElementTypeToInteger(), Mask); + Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(), + Mask); return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } @@ -19326,8 +19392,8 @@ // Sign extend of an unsigned unpack -> signed unpack if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) { - unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI - : AArch64ISD::SUNPKLO; + unsigned SOpc = + Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI : AArch64ISD::SUNPKLO; // Push the sign extend to the operand of the unpack // This is necessary where, for example, the operand of the unpack @@ -20313,16 +20379,17 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDLoc dl(V.getNode()); SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64); - SDValue VHi = DAG.getAnyExtOrTrunc( - DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)), - dl, MVT::i64); + SDValue VHi = + DAG.getAnyExtOrTrunc(DAG.getNode(ISD::SRL, dl, MVT::i128, V, + DAG.getConstant(64, dl, MVT::i64)), + dl, MVT::i64); if (DAG.getDataLayout().isBigEndian()) - std::swap (VLo, VHi); + std::swap(VLo, VHi); SDValue RegClass = DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32); SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32); - const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; + const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1}; return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); } @@ -20341,8 +20408,8 @@ SDValue Ops[] = { createGPRPairNode(DAG, N->getOperand(2)), // Compare value createGPRPairNode(DAG, N->getOperand(3)), // Store value - N->getOperand(1), // Ptr - N->getOperand(0), // Chain in + N->getOperand(1), // Ptr + N->getOperand(0), // Chain in }; unsigned Opcode; @@ -20494,8 +20561,8 @@ if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) || LoadNode->getMemoryVT() != MVT::i128) { - // Non-volatile or atomic loads are optimized later in AArch64's load/store - // optimizer. + // Non-volatile or atomic loads are optimized later in AArch64's + // load/store optimizer. return; } @@ -20534,30 +20601,30 @@ case Intrinsic::aarch64_sve_clasta_n: { SDLoc DL(N); auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); - auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, - N->getOperand(1), Op2, N->getOperand(3)); + auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, N->getOperand(1), + Op2, N->getOperand(3)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; } case Intrinsic::aarch64_sve_clastb_n: { SDLoc DL(N); auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); - auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, - N->getOperand(1), Op2, N->getOperand(3)); + auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, N->getOperand(1), + Op2, N->getOperand(3)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; } case Intrinsic::aarch64_sve_lasta: { SDLoc DL(N); - auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, - N->getOperand(1), N->getOperand(2)); + auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, N->getOperand(1), + N->getOperand(2)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; } case Intrinsic::aarch64_sve_lastb: { SDLoc DL(N); - auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, - N->getOperand(1), N->getOperand(2)); + auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, N->getOperand(1), + N->getOperand(2)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; } @@ -20650,7 +20717,8 @@ return AtomicExpansionKind::CmpXChg; unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - if (Size > 128) return AtomicExpansionKind::None; + if (Size > 128) + return AtomicExpansionKind::None; // Nand is not supported in LSE. // Leave 128 bits to LLSC or CmpXChg. @@ -20732,7 +20800,7 @@ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64"); } - Type *Tys[] = { Addr->getType() }; + Type *Tys[] = {Addr->getType()}; Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); @@ -20776,11 +20844,12 @@ Intrinsic::ID Int = IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr; - Type *Tys[] = { Addr->getType() }; + Type *Tys[] = {Addr->getType()}; Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); const DataLayout &DL = M->getDataLayout(); - IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType())); + IntegerType *IntValTy = + Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType())); Val = Builder.CreateBitCast(Val, IntValTy); CallInst *CI = Builder.CreateCall( @@ -20893,7 +20962,7 @@ // may be beneficial to sink in other cases, but we would have to check that // the cmp would not get folded into the br to form a cbz for these to be // beneficial. - ConstantInt* Mask = dyn_cast(AndI.getOperand(1)); + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); if (!Mask) return false; return Mask->getValue().isPowerOf2(); @@ -20952,9 +21021,9 @@ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction().hasFnAttribute( - Attribute::NoUnwind) && - "Function should be nounwind in insertCopiesSplitCSR!"); + assert( + Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); @@ -21029,9 +21098,7 @@ } // Unlike X86, we let frame lowering assign offsets to all catch objects. -bool AArch64TargetLowering::needsFixedCatchObjects() const { - return false; -} +bool AArch64TargetLowering::needsFixedCatchObjects() const { return false; } bool AArch64TargetLowering::shouldLocalize( const MachineInstr &MI, const TargetTransformInfo *TTI) const { @@ -21372,7 +21439,8 @@ SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32); SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT); - SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2); + SDValue Res = + DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2); if (Negated) Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res); @@ -21416,16 +21484,16 @@ Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo); Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi); Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi); - SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, - Op0Lo, Op1Lo); - SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, - Op0Hi, Op1Hi); + SDValue ResultLo = + DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, Op0Lo, Op1Lo); + SDValue ResultHi = + DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, Op0Hi, Op1Hi); // Convert again to scalable vectors to truncate. ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo); ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi); - SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT, - ResultLo, ResultHi); + SDValue ScalableResult = + DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT, ResultLo, ResultHi); return convertFromScalableVector(DAG, VT, ScalableResult); } @@ -21576,9 +21644,9 @@ SmallVector Operands = {Pg}; for (const SDValue &V : Op->op_values()) { - assert((!V.getValueType().isVector() || - V.getValueType().isScalableVector()) && - "Only scalable vectors are supported!"); + assert( + (!V.getValueType().isVector() || V.getValueType().isScalableVector()) && + "Only scalable vectors are supported!"); Operands.push_back(V); } @@ -21619,8 +21687,9 @@ return convertFromScalableVector(DAG, VT, ScalableRes); } -SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, - SelectionDAG &DAG) const { +SDValue +AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, + SelectionDAG &DAG) const { SDLoc DL(ScalarOp); SDValue AccOp = ScalarOp.getOperand(0); SDValue VecOp = ScalarOp.getOperand(1); @@ -21641,14 +21710,15 @@ DAG.getUNDEF(ContainerVT), AccOp, Zero); // Perform reduction. - SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, - Pg, AccOp, VecOp); + SDValue Rdx = + DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, Pg, AccOp, VecOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero); } -SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp, - SelectionDAG &DAG) const { +SDValue +AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp, + SelectionDAG &DAG) const { SDLoc DL(ReduceOp); SDValue Op = ReduceOp.getOperand(0); EVT OpVT = Op.getValueType(); @@ -21705,16 +21775,16 @@ } // UADDV always returns an i64 result. - EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 : - SrcVT.getVectorElementType(); + EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 + : SrcVT.getVectorElementType(); EVT RdxVT = SrcVT; if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED) RdxVT = getPackedSVEVectorVT(ResVT); SDValue Pg = getPredicateForVector(DAG, DL, SrcVT); SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp); - SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, - Rdx, DAG.getConstant(0, DL, MVT::i64)); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, + DAG.getConstant(0, DL, MVT::i64)); // The VEC_REDUCE nodes expect an element size result. if (ResVT != ScalarOp.getValueType()) @@ -21723,9 +21793,8 @@ return Res; } -SDValue -AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE( + SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); @@ -21742,8 +21811,7 @@ Mask = DAG.getNode(ISD::TRUNCATE, DL, MaskContainerVT.changeVectorElementType(MVT::i1), Mask); - auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, - Mask, Op1, Op2); + auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, Mask, Op1, Op2); return convertFromScalableVector(DAG, VT, ScalableRes); } @@ -21832,16 +21900,16 @@ SDValue Pg = getPredicateForVector(DAG, DL, VT); EVT SrcVT = Val.getValueType(); EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); - EVT ExtendVT = ContainerVT.changeVectorElementType( - SrcVT.getVectorElementType()); + EVT ExtendVT = + ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val); Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val); Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val); Val = getSVESafeBitCast(ExtendVT, Val, DAG); - Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, - Pg, Val, DAG.getUNDEF(ContainerVT)); + Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, Pg, + Val, DAG.getUNDEF(ContainerVT)); return convertFromScalableVector(DAG, VT, Val); } @@ -21856,8 +21924,8 @@ SDValue Val = Op.getOperand(0); EVT SrcVT = Val.getValueType(); EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT); - EVT RoundVT = ContainerSrcVT.changeVectorElementType( - VT.getVectorElementType()); + EVT RoundVT = + ContainerSrcVT.changeVectorElementType(VT.getVectorElementType()); SDValue Pg = getPredicateForVector(DAG, DL, RoundVT); Val = convertToScalableVector(DAG, ContainerSrcVT, Val); @@ -21934,7 +22002,7 @@ if (ContainerSrcVT.getVectorElementType().getSizeInBits() <= ContainerDstVT.getVectorElementType().getSizeInBits()) { EVT CvtVT = ContainerDstVT.changeVectorElementType( - ContainerSrcVT.getVectorElementType()); + ContainerSrcVT.getVectorElementType()); SDValue Pg = getPredicateForVector(DAG, DL, VT); Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val); @@ -22088,8 +22156,8 @@ SDLoc DL(Op); EVT InVT = Op.getValueType(); - assert(VT.isScalableVector() && isTypeLegal(VT) && - InVT.isScalableVector() && isTypeLegal(InVT) && + assert(VT.isScalableVector() && isTypeLegal(VT) && InVT.isScalableVector() && + isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"); assert(VT.getVectorElementType() != MVT::i1 && InVT.getVectorElementType() != MVT::i1 && diff --git a/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll b/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -opaque-pointers < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define void @f_undef(<8 x i64> %a, ptr %dst) { +; CHECK-LABEL: f_undef: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: ret +BB: + %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> undef + store <16 x i64> %S, ptr %dst, align 64 + ret void +} + +define void @f_poison(<8 x i64> %a, ptr %dst) { +; CHECK-LABEL: f_poison: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: ret +BB: + %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> poison + store <16 x i64> %S, ptr %dst, align 64 + ret void +}