Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -196,11 +196,12 @@ public: /// \name Scalar TTI Implementations /// @{ - bool allowsMisalignedMemoryAccesses(LLVMContext &Context, - unsigned BitWidth, unsigned AddressSpace, - unsigned Alignment, bool *Fast) const { + bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, + unsigned AddressSpace, unsigned Alignment, + bool *Fast) const { EVT E = EVT::getIntegerVT(Context, BitWidth); - return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast); + return getTLI()->allowsMisalignedMemoryAccesses( + E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); } bool hasBranchDivergence() { return false; } Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -1415,10 +1415,10 @@ /// copy/move/set is converted to a sequence of store operations. Its use /// helps to ensure that such replacements don't generate code that causes an /// alignment error (trap) on the target machine. - virtual bool allowsMisalignedMemoryAccesses(EVT, - unsigned AddrSpace = 0, - unsigned Align = 1, - bool * /*Fast*/ = nullptr) const { + virtual bool allowsMisalignedMemoryAccesses( + EVT, unsigned AddrSpace = 0, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool * /*Fast*/ = nullptr) const { return false; } @@ -1426,8 +1426,18 @@ /// given address space and alignment. If the access is allowed, the optional /// final parameter returns if the access is also fast (as defined by the /// target). + bool + allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace = 0, unsigned Alignment = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const; + + /// Return true if the target supports a memory access of this type for the + /// given MachineMemOperand. If the access is allowed, the optional + /// final parameter returns if the access is also fast (as defined by the + /// target). bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, - unsigned AddrSpace = 0, unsigned Alignment = 1, + const MachineMemOperand &MMO, bool *Fast = nullptr) const; /// Returns the target specific optimal type for load and store operations as Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4607,7 +4607,8 @@ // Ensure that this isn't going to produce an unsupported unaligned access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - LDST->getAddressSpace(), ShAmt / 8)) + LDST->getAddressSpace(), ShAmt / 8, + LDST->getMemOperand()->getFlags())) return false; // It's not possible to generate a constant of extended or untyped type. @@ -6408,9 +6409,9 @@ // Check that a store of the wide type is both allowed and fast on the target bool Fast = false; - bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - VT, FirstStore->getAddressSpace(), - FirstStore->getAlignment(), &Fast); + bool Allowed = + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *FirstStore->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); @@ -6573,8 +6574,7 @@ // Check that a load of the wide type is both allowed and fast on the target bool Fast = false; bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - VT, FirstLoad->getAddressSpace(), - FirstLoad->getAlignment(), &Fast); + VT, *FirstLoad->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); @@ -10797,15 +10797,14 @@ TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); - unsigned OrigAlign = LN0->getAlignment(); bool Fast = false; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - LN0->getAddressSpace(), OrigAlign, &Fast) && + *LN0->getMemOperand(), &Fast) && Fast) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), OrigAlign, + LN0->getPointerInfo(), LN0->getAlignment(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; @@ -15408,8 +15407,8 @@ if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && IsFast) { LastIntegerTrunc = false; LastLegalType = i + 1; @@ -15420,8 +15419,9 @@ TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), + &IsFast) && IsFast) { LastIntegerTrunc = true; LastLegalType = i + 1; @@ -15439,8 +15439,8 @@ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess( + Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) LastLegalVectorType = i + 1; } @@ -15511,8 +15511,8 @@ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && IsFast) NumStoresToMerge = i + 1; } @@ -15603,7 +15603,6 @@ unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); - unsigned FirstLoadAS = FirstLoad->getAddressSpace(); unsigned FirstLoadAlign = FirstLoad->getAlignment(); // Scan the memory operations on the chain and find the first @@ -15643,11 +15642,11 @@ bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } @@ -15657,11 +15656,11 @@ StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; DoIntegerTruncate = false; @@ -15676,11 +15675,12 @@ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), + &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; DoIntegerTruncate = true; @@ -15931,13 +15931,12 @@ if (((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { - unsigned OrigAlign = ST->getAlignment(); bool Fast = false; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, - ST->getAddressSpace(), OrigAlign, &Fast) && + *ST->getMemOperand(), &Fast) && Fast) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, - ST->getPointerInfo(), OrigAlign, + ST->getPointerInfo(), ST->getAlignment(), ST->getMemOperand()->getFlags(), ST->getAAInfo()); } } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -492,10 +492,9 @@ // If this is an unaligned store and the target doesn't support it, // expand it. EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); @@ -607,11 +606,10 @@ default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } @@ -668,13 +666,12 @@ default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = LD->getMemoryVT(); - unsigned AS = LD->getAddressSpace(); - unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { + std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; } @@ -860,10 +857,9 @@ // If this is an unaligned load and the target doesn't support it, // expand it. EVT MemVT = LD->getMemoryVT(); - unsigned AS = LD->getAddressSpace(); - unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); } } Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -238,7 +238,8 @@ // issuing a (or a pair of) unaligned and overlapping load / store. bool Fast; if (NumMemOps && AllowOverlap && NewVTSize < Size && - allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && + allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, + MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; else { Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1464,6 +1464,7 @@ const DataLayout &DL, EVT VT, unsigned AddrSpace, unsigned Alignment, + MachineMemOperand::Flags Flags, bool *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution @@ -1479,7 +1480,15 @@ } // This is a misaligned access. - return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); +} + +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, + bool *Fast) const { + return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), + MMO.getAlignment(), MMO.getFlags(), Fast); } BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -262,9 +262,10 @@ /// Returns true if the target allows unaligned memory accesses of the /// specified type. - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, - unsigned Align = 1, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1074,10 +1074,9 @@ return MVT::i64; } -bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - unsigned Align, - bool *Fast) const { +bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast) const { if (Subtarget->requiresStrictAlign()) return false; @@ -2843,7 +2842,8 @@ unsigned AS = StoreNode->getAddressSpace(); unsigned Align = StoreNode->getAlignment(); if (Align < MemVT.getStoreSize() && - !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) { + !allowsMisalignedMemoryAccesses( + MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) { return scalarizeVectorStore(StoreNode, DAG); } @@ -8716,7 +8716,9 @@ if (memOpAlign(SrcAlign, DstAlign, AlignCheck)) return true; bool Fast; - return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast; + return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, + &Fast) && + Fast; }; if (CanUseNEON && IsMemset && !IsSmallMemset && Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2965,7 +2965,8 @@ // Expand unaligned loads earlier than legalization. Due to visitation order // problems during legalization, the emitted instructions to pack and unpack // the bytes again are not eliminated in the case of an unaligned copy. - if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) { + if (!allowsMisalignedMemoryAccesses( + VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) { if (VT.isVector()) return scalarizeVectorLoad(LN, DAG); @@ -3017,7 +3018,8 @@ // order problems during legalization, the emitted instructions to pack and // unpack the bytes again are not eliminated in the case of an unaligned // copy. - if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) { + if (!allowsMisalignedMemoryAccesses( + VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) { if (VT.isVector()) return scalarizeVectorStore(SN, DAG); Index: lib/Target/AMDGPU/R600ISelLowering.h =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.h +++ lib/Target/AMDGPU/R600ISelLowering.h @@ -49,9 +49,10 @@ bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override; - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, - unsigned Align, - bool *IsFast) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *IsFast = nullptr) const override; private: unsigned Gen; Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1261,7 +1261,8 @@ unsigned Align = StoreNode->getAlignment(); if (Align < MemVT.getStoreSize() && - !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) { + !allowsMisalignedMemoryAccesses( + MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) { return expandUnalignedStore(StoreNode, DAG); } @@ -1663,10 +1664,9 @@ return true; } -bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - unsigned Align, - bool *IsFast) const { +bool R600TargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *IsFast) const { if (IsFast) *IsFast = false; Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -233,9 +233,10 @@ bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override; - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, - unsigned Align, - bool *IsFast) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *IsFast = nullptr) const override; EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1157,10 +1157,9 @@ return true; } -bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - unsigned Align, - bool *IsFast) const { +bool SITargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *IsFast) const { if (IsFast) *IsFast = false; @@ -6756,14 +6755,15 @@ assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); - unsigned Alignment = Load->getAlignment(); - unsigned AS = Load->getAddressSpace(); if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - AS, Alignment)) { + *Load->getMemOperand())) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, DL); } + + unsigned Alignment = Load->getAlignment(); + unsigned AS = Load->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS && Alignment < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) { @@ -7224,12 +7224,12 @@ assert(VT.isVector() && Store->getValue().getValueType().getScalarType() == MVT::i32); - unsigned AS = Store->getAddressSpace(); if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - AS, Store->getAlignment())) { + *Store->getMemOperand())) { return expandUnalignedStore(Store, DAG); } + unsigned AS = Store->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS && Store->getAlignment() < VT.getStoreSize() && VT.getSizeInBits() > 32) { Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -321,6 +321,7 @@ /// is "fast" by reference in the second argument. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, bool *Fast) const override; EVT getOptimalMemOpType(uint64_t Size, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -13043,9 +13043,9 @@ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, + MachineMemOperand::Flags, bool *Fast) const { // Depends what it gets converted into if the type is weird. if (!VT.isSimple()) @@ -13099,11 +13099,14 @@ bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, + MachineMemOperand::MONone, &Fast) && + Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + (allowsMisalignedMemoryAccesses( + MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) && Fast))) { return MVT::f64; } Index: lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.h +++ lib/Target/Hexagon/HexagonISelLowering.h @@ -298,7 +298,7 @@ const AttributeList &FuncAttributes) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Align, bool *Fast) const override; + unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override; /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3065,8 +3065,9 @@ return MVT::Other; } -bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned AS, unsigned Align, bool *Fast) const { +bool HexagonTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast) const { if (Fast) *Fast = false; return Subtarget.isHVXVectorType(VT.getSimpleVT()); Index: lib/Target/Mips/Mips16ISelLowering.h =================================================================== --- lib/Target/Mips/Mips16ISelLowering.h +++ lib/Target/Mips/Mips16ISelLowering.h @@ -23,6 +23,7 @@ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, bool *Fast) const override; MachineBasicBlock * Index: lib/Target/Mips/Mips16ISelLowering.cpp =================================================================== --- lib/Target/Mips/Mips16ISelLowering.cpp +++ lib/Target/Mips/Mips16ISelLowering.cpp @@ -155,11 +155,8 @@ return new Mips16TargetLowering(TM, STI); } -bool -Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, - unsigned, - bool *Fast) const { +bool Mips16TargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { return false; } Index: lib/Target/Mips/MipsSEISelLowering.h =================================================================== --- lib/Target/Mips/MipsSEISelLowering.h +++ lib/Target/Mips/MipsSEISelLowering.h @@ -40,9 +40,10 @@ void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0, - unsigned Align = 1, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS = 0, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -419,11 +419,8 @@ Op->getOperand(2)); } -bool -MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, - unsigned, - bool *Fast) const { +bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; if (Subtarget.systemSupportsUnalignedAccess()) { Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2231,7 +2231,7 @@ LoadSDNode *Load = cast(Op); EVT MemVT = Load->getMemoryVT(); if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - Load->getAddressSpace(), Load->getAlignment())) { + *Load->getMemOperand())) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, SDLoc(Op)); Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -846,10 +846,10 @@ /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - bool allowsMisalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - unsigned Align = 1, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -14566,6 +14566,7 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, + MachineMemOperand::Flags, bool *Fast) const { if (DisablePPCUnaligned) return false; Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -409,6 +409,7 @@ Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags, bool *Fast) const override; bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -761,10 +761,8 @@ return isUInt<32>(Imm) || isUInt<32>(-Imm); } -bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, - unsigned, - bool *Fast) const { +bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. Index: lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -60,6 +60,7 @@ unsigned AS, Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, bool *Fast) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -530,7 +530,8 @@ } bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( - EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const { + EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, + MachineMemOperand::Flags /*Flags*/, bool *Fast) const { // WebAssembly supports unaligned accesses, though it should be declared // with the p2align attribute on loads and stores which do so, and there // may be a performance impact. We tell LLVM they're "fast" because Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -745,7 +745,8 @@ /// Returns true if the target allows unaligned memory accesses of the /// specified type. Returns whether it is "fast" in the last argument. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, - bool *Fast) const override; + MachineMemOperand::Flags Flags, + bool *Fast) const override; /// Provide custom lowering hooks for some operations. /// Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -178,10 +178,10 @@ // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); - setTruncStoreAction(MVT::i64, MVT::i8 , Expand); + setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i32, MVT::i16, Expand); - setTruncStoreAction(MVT::i32, MVT::i8 , Expand); - setTruncStoreAction(MVT::i16, MVT::i8, Expand); + setTruncStoreAction(MVT::i32, MVT::i8, Expand); + setTruncStoreAction(MVT::i16, MVT::i8, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -195,96 +195,96 @@ // Integer absolute. if (Subtarget.hasCMov()) { - setOperationAction(ISD::ABS , MVT::i16 , Custom); - setOperationAction(ISD::ABS , MVT::i32 , Custom); + setOperationAction(ISD::ABS, MVT::i16, Custom); + setOperationAction(ISD::ABS, MVT::i32, Custom); } - setOperationAction(ISD::ABS , MVT::i64 , Custom); + setOperationAction(ISD::ABS, MVT::i64, Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { - setOperationAction(ShiftOp , MVT::i16 , Custom); - setOperationAction(ShiftOp , MVT::i32 , Custom); + setOperationAction(ShiftOp, MVT::i16, Custom); + setOperationAction(ShiftOp, MVT::i32, Custom); if (Subtarget.is64Bit()) - setOperationAction(ShiftOp , MVT::i64 , Custom); + setOperationAction(ShiftOp, MVT::i64, Custom); } // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. - setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); - setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); - setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); if (Subtarget.is64Bit()) { if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) // f32/f64 are legal, f80 is custom. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); else - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } else if (!Subtarget.useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD or VCVTUSI2SS/SD for other targets. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } else { - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have // this operation. - setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); - setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); if (!Subtarget.useSoftFloat()) { // SSE has no i16 to fp conversion, only i32. if (X86ScalarSSEf32) { - setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); // f32 and f64 cases are Legal, f80 case is not - setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { - setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); - setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } } else { - setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); - setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); } // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have // this operation. - setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); - setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); if (!Subtarget.useSoftFloat()) { // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 // are Legal, f80 is custom lowered. - setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); - setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } else { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); - setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); } // Handle FP_TO_UINT by promoting the destination to a larger signed // conversion. - setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); - setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); - setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); if (Subtarget.is64Bit()) { if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); } else { - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); } } else if (!Subtarget.useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. @@ -292,27 +292,27 @@ // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); else // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. // With SSE3 we can use fisttpll to convert to a signed i64; without // SSE, we're stuck with a fistpll. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { - setOperationAction(ISD::BITCAST , MVT::f32 , Expand); - setOperationAction(ISD::BITCAST , MVT::i32 , Expand); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); if (Subtarget.is64Bit()) { - setOperationAction(ISD::BITCAST , MVT::f64 , Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); // Without SSE, i64->f64 goes through memory. - setOperationAction(ISD::BITCAST , MVT::i64 , Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); } } else if (!Subtarget.is64Bit()) - setOperationAction(ISD::BITCAST , MVT::i64 , Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); // Scalar integer divide and remainder are lowered to use operations that // produce two results, to match the available instructions. This exposes @@ -324,7 +324,7 @@ // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); @@ -333,36 +333,36 @@ setOperationAction(ISD::UREM, VT, Expand); } - setOperationAction(ISD::BR_JT , MVT::Other, Expand); - setOperationAction(ISD::BRCOND , MVT::Other, Custom); - for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, - MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { - setOperationAction(ISD::BR_CC, VT, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128, MVT::i8, MVT::i16, + MVT::i32, MVT::i64}) { + setOperationAction(ISD::BR_CC, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); } if (Subtarget.is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); - setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); - - setOperationAction(ISD::FREM , MVT::f32 , Expand); - setOperationAction(ISD::FREM , MVT::f64 , Expand); - setOperationAction(ISD::FREM , MVT::f80 , Expand); - setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::FP_ROUND_INREG, MVT::f32, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f80, Expand); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // Promote the i8 variants and force them on up to i32 which has a shorter // encoding. - setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); - setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); + setOperationPromotedToType(ISD::CTTZ, MVT::i8, MVT::i32); + setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8, MVT::i32); if (!Subtarget.hasBMI()) { - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTTZ , MVT::i32 , Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); + setOperationAction(ISD::CTTZ, MVT::i16, Custom); + setOperationAction(ISD::CTTZ, MVT::i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Legal); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal); if (Subtarget.is64Bit()) { - setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + setOperationAction(ISD::CTTZ, MVT::i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); } } @@ -370,17 +370,17 @@ if (Subtarget.hasLZCNT()) { // When promoting the i8 variants, force them to i32 for a shorter // encoding. - setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); - setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); + setOperationPromotedToType(ISD::CTLZ, MVT::i8, MVT::i32); + setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8, MVT::i32); } else { - setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i32 , Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); + setOperationAction(ISD::CTLZ, MVT::i8, Custom); + setOperationAction(ISD::CTLZ, MVT::i16, Custom); + setOperationAction(ISD::CTLZ, MVT::i32, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); if (Subtarget.is64Bit()) { - setOperationAction(ISD::CTLZ , MVT::i64 , Custom); + setOperationAction(ISD::CTLZ, MVT::i64, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); } } @@ -409,39 +409,39 @@ if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); } else { - setOperationAction(ISD::CTPOP , MVT::i8 , Expand); - setOperationAction(ISD::CTPOP , MVT::i16 , Expand); - setOperationAction(ISD::CTPOP , MVT::i32 , Expand); + setOperationAction(ISD::CTPOP, MVT::i8, Expand); + setOperationAction(ISD::CTPOP, MVT::i16, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (Subtarget.is64Bit()) - setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); else - setOperationAction(ISD::CTPOP , MVT::i64 , Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); } - setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); if (!Subtarget.hasMOVBE()) - setOperationAction(ISD::BSWAP , MVT::i16 , Expand); + setOperationAction(ISD::BSWAP, MVT::i16, Expand); // These should be promoted to a larger select which is supported. - setOperationAction(ISD::SELECT , MVT::i1 , Promote); + setOperationAction(ISD::SELECT, MVT::i1, Promote); // X86 wants to expand cmov itself. - for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { + for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) { setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); } - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); } // Custom action for SELECT MMX and expand action for SELECT_CC MMX setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); - setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since // LLVM/Clang supports zero-cost DWARF and SEH exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); @@ -451,19 +451,19 @@ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); // Darwin ABI issue. - for (auto VT : { MVT::i32, MVT::i64 }) { + for (auto VT : {MVT::i32, MVT::i64}) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; - setOperationAction(ISD::ConstantPool , VT, Custom); - setOperationAction(ISD::JumpTable , VT, Custom); - setOperationAction(ISD::GlobalAddress , VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::GlobalAddress, VT, Custom); setOperationAction(ISD::GlobalTLSAddress, VT, Custom); - setOperationAction(ISD::ExternalSymbol , VT, Custom); - setOperationAction(ISD::BlockAddress , VT, Custom); + setOperationAction(ISD::ExternalSymbol, VT, Custom); + setOperationAction(ISD::BlockAddress, VT, Custom); } // 64-bit shl, sra, srl (iff 32-bit x86) - for (auto VT : { MVT::i32, MVT::i64 }) { + for (auto VT : {MVT::i32, MVT::i64}) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; setOperationAction(ISD::SHL_PARTS, VT, Custom); @@ -472,12 +472,12 @@ } if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) - setOperationAction(ISD::PREFETCH , MVT::Other, Legal); + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); - setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Expand certain atomics - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); @@ -511,14 +511,14 @@ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); // VASTART needs to be custom lowered to use the VarArgsFrameIndex - setOperationAction(ISD::VASTART , MVT::Other, Custom); - setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); bool Is64Bit = Subtarget.is64Bit(); - setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); + setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); @@ -540,7 +540,7 @@ // non-optsize case. setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - for (auto VT : { MVT::f32, MVT::f64 }) { + for (auto VT : {MVT::f32, MVT::f64}) { // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS, VT, Custom); @@ -555,8 +555,8 @@ setOperationAction(ISD::FSUB, VT, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , VT, Expand); - setOperationAction(ISD::FCOS , VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); } @@ -572,10 +572,10 @@ addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. - setOperationAction(ISD::FABS , MVT::f32, Custom); + setOperationAction(ISD::FABS, MVT::f32, Custom); // Use XORP to simulate FNEG. - setOperationAction(ISD::FNEG , MVT::f32, Custom); + setOperationAction(ISD::FNEG, MVT::f32, Custom); if (UseX87) setOperationAction(ISD::UNDEF, MVT::f64, Expand); @@ -586,8 +586,8 @@ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); if (UseX87) { @@ -602,13 +602,13 @@ addRegisterClass(MVT::f64, &X86::RFP64RegClass); addRegisterClass(MVT::f32, &X86::RFP32RegClass); - for (auto VT : { MVT::f32, MVT::f64 }) { - setOperationAction(ISD::UNDEF, VT, Expand); + for (auto VT : {MVT::f32, MVT::f64}) { + setOperationAction(ISD::UNDEF, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); // Always expand sin/cos functions even though x87 has an instruction. - setOperationAction(ISD::FSIN , VT, Expand); - setOperationAction(ISD::FCOS , VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); } } @@ -620,7 +620,7 @@ addLegalFPImmediate(APFloat(+1.0f)); // FLD1 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS - } else // SSE immediates. + } else // SSE immediates. addLegalFPImmediate(APFloat(+0.0f)); // xorps } // Expand FP64 immediates into loads from the stack, save special cases. @@ -630,7 +630,7 @@ addLegalFPImmediate(APFloat(+1.0)); // FLD1 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS - } else // SSE immediates. + } else // SSE immediates. addLegalFPImmediate(APFloat(+0.0)); // xorpd } @@ -644,38 +644,38 @@ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); - setOperationAction(ISD::FABS , MVT::f128, Custom); - setOperationAction(ISD::FNEG , MVT::f128, Custom); + setOperationAction(ISD::FABS, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); } addRegisterClass(MVT::f80, &X86::RFP80RegClass); - setOperationAction(ISD::UNDEF, MVT::f80, Expand); + setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); - addLegalFPImmediate(TmpFlt); // FLD0 + addLegalFPImmediate(TmpFlt); // FLD0 TmpFlt.changeSign(); - addLegalFPImmediate(TmpFlt); // FLD0/FCHS + addLegalFPImmediate(TmpFlt); // FLD0/FCHS bool ignored; APFloat TmpFlt2(+1.0); - TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, - &ignored); - addLegalFPImmediate(TmpFlt2); // FLD1 + TmpFlt2.convert(APFloat::x87DoubleExtended(), + APFloat::rmNearestTiesToEven, &ignored); + addLegalFPImmediate(TmpFlt2); // FLD1 TmpFlt2.changeSign(); - addLegalFPImmediate(TmpFlt2); // FLD1/FCHS + addLegalFPImmediate(TmpFlt2); // FLD1/FCHS } // Always expand sin/cos functions even though x87 has an instruction. - setOperationAction(ISD::FSIN , MVT::f80, Expand); - setOperationAction(ISD::FCOS , MVT::f80, Expand); + setOperationAction(ISD::FSIN, MVT::f80, Expand); + setOperationAction(ISD::FCOS, MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); setOperationAction(ISD::FFLOOR, MVT::f80, Expand); - setOperationAction(ISD::FCEIL, MVT::f80, Expand); + setOperationAction(ISD::FCEIL, MVT::f80, Expand); setOperationAction(ISD::FTRUNC, MVT::f80, Expand); - setOperationAction(ISD::FRINT, MVT::f80, Expand); + setOperationAction(ISD::FRINT, MVT::f80, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); setOperationAction(ISD::FMA, MVT::f80, Expand); setOperationAction(ISD::LROUND, MVT::f80, Expand); @@ -685,9 +685,9 @@ } // Always use a library call for pow. - setOperationAction(ISD::FPOW , MVT::f32 , Expand); - setOperationAction(ISD::FPOW , MVT::f64 , Expand); - setOperationAction(ISD::FPOW , MVT::f80 , Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f80, Expand); setOperationAction(ISD::FLOG, MVT::f80, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); @@ -698,19 +698,19 @@ setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); // Some FP actions are always expanded for vector types. - for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, - MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { - setOperationAction(ISD::FSIN, VT, Expand); - setOperationAction(ISD::FSINCOS, VT, Expand); - setOperationAction(ISD::FCOS, VT, Expand); - setOperationAction(ISD::FREM, VT, Expand); + for (auto VT : {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v2f64, MVT::v4f64, + MVT::v8f64}) { + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); - setOperationAction(ISD::FPOW, VT, Expand); - setOperationAction(ISD::FLOG, VT, Expand); - setOperationAction(ISD::FLOG2, VT, Expand); - setOperationAction(ISD::FLOG10, VT, Expand); - setOperationAction(ISD::FEXP, VT, Expand); - setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); } // First set operation action for all vector types to either promote @@ -721,11 +721,11 @@ setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); - setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); - setOperationAction(ISD::FMA, VT, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand); + setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -748,7 +748,7 @@ setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::SINT_TO_FP, VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::TRUNCATE, VT, Expand); setOperationAction(ISD::SIGN_EXTEND, VT, Expand); setOperationAction(ISD::ZERO_EXTEND, VT, Expand); @@ -785,15 +785,15 @@ addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); - setOperationAction(ISD::FNEG, MVT::v4f32, Custom); - setOperationAction(ISD::FABS, MVT::v4f32, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); - setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); + setOperationAction(ISD::FNEG, MVT::v4f32, Custom); + setOperationAction(ISD::FABS, MVT::v4f32, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); - setOperationAction(ISD::SELECT, MVT::v4f32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT, MVT::v4f32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -811,59 +811,59 @@ addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); - for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { + for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, + MVT::v2i32}) { setOperationAction(ISD::SDIV, VT, Custom); setOperationAction(ISD::SREM, VT, Custom); setOperationAction(ISD::UDIV, VT, Custom); setOperationAction(ISD::UREM, VT, Custom); } - setOperationAction(ISD::MUL, MVT::v2i8, Custom); - setOperationAction(ISD::MUL, MVT::v2i16, Custom); - setOperationAction(ISD::MUL, MVT::v2i32, Custom); - setOperationAction(ISD::MUL, MVT::v4i8, Custom); - setOperationAction(ISD::MUL, MVT::v4i16, Custom); - setOperationAction(ISD::MUL, MVT::v8i8, Custom); - - setOperationAction(ISD::MUL, MVT::v16i8, Custom); - setOperationAction(ISD::MUL, MVT::v4i32, Custom); - setOperationAction(ISD::MUL, MVT::v2i64, Custom); - setOperationAction(ISD::MULHU, MVT::v4i32, Custom); - setOperationAction(ISD::MULHS, MVT::v4i32, Custom); - setOperationAction(ISD::MULHU, MVT::v16i8, Custom); - setOperationAction(ISD::MULHS, MVT::v16i8, Custom); - setOperationAction(ISD::MULHU, MVT::v8i16, Legal); - setOperationAction(ISD::MULHS, MVT::v8i16, Legal); - setOperationAction(ISD::MUL, MVT::v8i16, Legal); - setOperationAction(ISD::FNEG, MVT::v2f64, Custom); - setOperationAction(ISD::FABS, MVT::v2f64, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); + setOperationAction(ISD::MUL, MVT::v2i8, Custom); + setOperationAction(ISD::MUL, MVT::v2i16, Custom); + setOperationAction(ISD::MUL, MVT::v2i32, Custom); + setOperationAction(ISD::MUL, MVT::v4i8, Custom); + setOperationAction(ISD::MUL, MVT::v4i16, Custom); + setOperationAction(ISD::MUL, MVT::v8i8, Custom); + + setOperationAction(ISD::MUL, MVT::v16i8, Custom); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); + setOperationAction(ISD::MULHU, MVT::v4i32, Custom); + setOperationAction(ISD::MULHS, MVT::v4i32, Custom); + setOperationAction(ISD::MULHU, MVT::v16i8, Custom); + setOperationAction(ISD::MULHS, MVT::v16i8, Custom); + setOperationAction(ISD::MULHU, MVT::v8i16, Legal); + setOperationAction(ISD::MULHS, MVT::v8i16, Legal); + setOperationAction(ISD::MUL, MVT::v8i16, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Custom); + setOperationAction(ISD::FABS, MVT::v2f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); } - setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); - setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); - setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); - setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); - setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); - setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); - setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); - setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); - setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); - setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); - setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); - setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); + setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); + setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); + setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); + setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); + setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); + setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); + setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); + setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); + setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); + setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); + setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); + setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); if (!ExperimentalVectorWideningLegalization) { // Use widening instead of promotion. - for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, - MVT::v4i16, MVT::v2i16 }) { + for (auto VT : + {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16}) { setOperationAction(ISD::UADDSAT, VT, Custom); setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); @@ -871,21 +871,21 @@ } } - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // Provide custom widening for v2f32 setcc. This is really for VLX when // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to // type legalization changing the result type to v4i1 during widening. // It works fine for SSE2 and is probably faster so no need to qualify with // VLX support. - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); + setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::ABS, VT, Custom); + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::ABS, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -893,11 +893,11 @@ setCondCodeAction(ISD::SETLE, VT, Custom); } - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Custom); + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) { + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } @@ -914,40 +914,40 @@ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); } - for (auto VT : { MVT::v2f64, MVT::v2i64 }) { - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Custom); + for (auto VT : {MVT::v2f64, MVT::v2i64}) { + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Custom); if (VT == MVT::v2i64 && !Subtarget.is64Bit()) continue; - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } // Custom lower v2i64 and v2f64 selects. - setOperationAction(ISD::SELECT, MVT::v2f64, Custom); - setOperationAction(ISD::SELECT, MVT::v2i64, Custom); - setOperationAction(ISD::SELECT, MVT::v4i32, Custom); - setOperationAction(ISD::SELECT, MVT::v8i16, Custom); - setOperationAction(ISD::SELECT, MVT::v16i8, Custom); - - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); + setOperationAction(ISD::SELECT, MVT::v2f64, Custom); + setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + setOperationAction(ISD::SELECT, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT, MVT::v8i16, Custom); + setOperationAction(ISD::SELECT, MVT::v16i8, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); // Custom legalize these to avoid over promotion or custom promotion. - setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is @@ -955,34 +955,34 @@ // be emitted instead of an AssertZExt. This will allow packssdw followed by // packuswb to be used to truncate to v8i8. This is necessary since packusdw // isn't available until sse4.1. - setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion. - setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); // We want to legalize this to an f64 load rather than an i64 load on // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for // store. - setOperationAction(ISD::LOAD, MVT::v2f32, Custom); - setOperationAction(ISD::LOAD, MVT::v2i32, Custom); - setOperationAction(ISD::LOAD, MVT::v4i16, Custom); - setOperationAction(ISD::LOAD, MVT::v8i8, Custom); - setOperationAction(ISD::STORE, MVT::v2f32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Custom); - setOperationAction(ISD::STORE, MVT::v4i16, Custom); - setOperationAction(ISD::STORE, MVT::v8i8, Custom); - - setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); - setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); - setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); + setOperationAction(ISD::LOAD, MVT::v2f32, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i16, Custom); + setOperationAction(ISD::LOAD, MVT::v8i8, Custom); + setOperationAction(ISD::STORE, MVT::v2f32, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v4i16, Custom); + setOperationAction(ISD::STORE, MVT::v8i8, Custom); + + setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); + setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); + setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); @@ -993,77 +993,77 @@ if (ExperimentalVectorWideningLegalization) { setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); } else { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); } // In the customized shift lowering, the legal v4i32/v2i64 cases // in AVX2 will be recognized. - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); } - setOperationAction(ISD::ROTL, MVT::v4i32, Custom); - setOperationAction(ISD::ROTL, MVT::v8i16, Custom); + setOperationAction(ISD::ROTL, MVT::v4i32, Custom); + setOperationAction(ISD::ROTL, MVT::v8i16, Custom); // With AVX512, expanding (and promoting the shifts) is better. if (!Subtarget.hasAVX512()) - setOperationAction(ISD::ROTL, MVT::v16i8, Custom); + setOperationAction(ISD::ROTL, MVT::v16i8, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { - setOperationAction(ISD::ABS, MVT::v16i8, Legal); - setOperationAction(ISD::ABS, MVT::v8i16, Legal); - setOperationAction(ISD::ABS, MVT::v4i32, Legal); - setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); - setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); - setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); - setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); - setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); + setOperationAction(ISD::ABS, MVT::v16i8, Legal); + setOperationAction(ISD::ABS, MVT::v8i16, Legal); + setOperationAction(ISD::ABS, MVT::v4i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); + setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); + setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); + setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); + setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); // These might be better off as horizontal vector ops. - setOperationAction(ISD::ADD, MVT::i16, Custom); - setOperationAction(ISD::ADD, MVT::i32, Custom); - setOperationAction(ISD::SUB, MVT::i16, Custom); - setOperationAction(ISD::SUB, MVT::i32, Custom); + setOperationAction(ISD::ADD, MVT::i16, Custom); + setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::SUB, MVT::i16, Custom); + setOperationAction(ISD::SUB, MVT::i32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { - setOperationAction(ISD::FFLOOR, RoundedTy, Legal); - setOperationAction(ISD::FCEIL, RoundedTy, Legal); - setOperationAction(ISD::FTRUNC, RoundedTy, Legal); - setOperationAction(ISD::FRINT, RoundedTy, Legal); - setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); + setOperationAction(ISD::FFLOOR, RoundedTy, Legal); + setOperationAction(ISD::FCEIL, RoundedTy, Legal); + setOperationAction(ISD::FTRUNC, RoundedTy, Legal); + setOperationAction(ISD::FRINT, RoundedTy, Legal); + setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); } - setOperationAction(ISD::SMAX, MVT::v16i8, Legal); - setOperationAction(ISD::SMAX, MVT::v4i32, Legal); - setOperationAction(ISD::UMAX, MVT::v8i16, Legal); - setOperationAction(ISD::UMAX, MVT::v4i32, Legal); - setOperationAction(ISD::SMIN, MVT::v16i8, Legal); - setOperationAction(ISD::SMIN, MVT::v4i32, Legal); - setOperationAction(ISD::UMIN, MVT::v8i16, Legal); - setOperationAction(ISD::UMIN, MVT::v4i32, Legal); + setOperationAction(ISD::SMAX, MVT::v16i8, Legal); + setOperationAction(ISD::SMAX, MVT::v4i32, Legal); + setOperationAction(ISD::UMAX, MVT::v8i16, Legal); + setOperationAction(ISD::UMAX, MVT::v4i32, Legal); + setOperationAction(ISD::SMIN, MVT::v16i8, Legal); + setOperationAction(ISD::SMIN, MVT::v4i32, Legal); + setOperationAction(ISD::UMIN, MVT::v8i16, Legal); + setOperationAction(ISD::UMIN, MVT::v4i32, Legal); // FIXME: Do we need to handle scalar-to-vector here? - setOperationAction(ISD::MUL, MVT::v4i32, Legal); + setOperationAction(ISD::MUL, MVT::v4i32, Legal); // We directly match byte blends in the backend as they match the VSELECT // condition form. - setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); // SSE41 brings specific instructions for doing vector sign extend even in // cases where we don't have SRA. - for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); } @@ -1079,12 +1079,12 @@ } // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X - for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { - setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); - setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); + for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) { + setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); if (!ExperimentalVectorWideningLegalization) - setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); - setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); @@ -1092,105 +1092,105 @@ // i8 vectors are custom because the source register and source // source memory operand types are not the same width. - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8, + MVT::v16i16, MVT::v8i32, MVT::v4i64}) setOperationAction(ISD::ROTL, VT, Custom); // XOP can efficiently perform BITREVERSE with VPPERM. - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) + for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) setOperationAction(ISD::BITREVERSE, VT, Custom); - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8, + MVT::v16i16, MVT::v8i32, MVT::v4i64}) setOperationAction(ISD::BITREVERSE, VT, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { bool HasInt256 = Subtarget.hasInt256(); - addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); + addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); - addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - - for (auto VT : { MVT::v8f32, MVT::v4f64 }) { - setOperationAction(ISD::FFLOOR, VT, Legal); - setOperationAction(ISD::FCEIL, VT, Legal); - setOperationAction(ISD::FTRUNC, VT, Legal); - setOperationAction(ISD::FRINT, VT, Legal); + addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + + for (auto VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::FNEG, VT, Custom); - setOperationAction(ISD::FABS, VT, Custom); - setOperationAction(ISD::FCOPYSIGN, VT, Custom); + setOperationAction(ISD::FNEG, VT, Custom); + setOperationAction(ISD::FABS, VT, Custom); + setOperationAction(ISD::FCOPYSIGN, VT, Custom); } // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted // even though v8i16 is a legal type. setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); // In the customized shift lowering, the legal v8i32/v4i64 cases // in AVX2 will be recognized. - for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) { setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); } // These types need custom splitting if their input is a 128-bit vector. - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ROTL, MVT::v8i32, Custom); - setOperationAction(ISD::ROTL, MVT::v16i16, Custom); + setOperationAction(ISD::ROTL, MVT::v8i32, Custom); + setOperationAction(ISD::ROTL, MVT::v16i16, Custom); // With BWI, expanding (and promoting the shifts) is the better. if (!Subtarget.hasBWI()) - setOperationAction(ISD::ROTL, MVT::v32i8, Custom); + setOperationAction(ISD::ROTL, MVT::v32i8, Custom); + + setOperationAction(ISD::SELECT, MVT::v4f64, Custom); + setOperationAction(ISD::SELECT, MVT::v4i64, Custom); + setOperationAction(ISD::SELECT, MVT::v8i32, Custom); + setOperationAction(ISD::SELECT, MVT::v16i16, Custom); + setOperationAction(ISD::SELECT, MVT::v32i8, Custom); + setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + + for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } - setOperationAction(ISD::SELECT, MVT::v4f64, Custom); - setOperationAction(ISD::SELECT, MVT::v4i64, Custom); - setOperationAction(ISD::SELECT, MVT::v8i32, Custom); - setOperationAction(ISD::SELECT, MVT::v16i16, Custom); - setOperationAction(ISD::SELECT, MVT::v32i8, Custom); - setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - - for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { - setOperationAction(ISD::SIGN_EXTEND, VT, Custom); - setOperationAction(ISD::ZERO_EXTEND, VT, Custom); - setOperationAction(ISD::ANY_EXTEND, VT, Custom); - } - - setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); - setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); - - for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::CTLZ, VT, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); + setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); + + for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) { + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTLZ, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -1199,45 +1199,45 @@ } if (Subtarget.hasAnyFMA()) { - for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, - MVT::v2f64, MVT::v4f64 }) + for (auto VT : + {MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) setOperationAction(ISD::FMA, VT, Legal); } - for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) { setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); } - setOperationAction(ISD::MUL, MVT::v4i64, Custom); - setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); - setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); - setOperationAction(ISD::MUL, MVT::v32i8, Custom); - - setOperationAction(ISD::MULHU, MVT::v8i32, Custom); - setOperationAction(ISD::MULHS, MVT::v8i32, Custom); - setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); - setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); - setOperationAction(ISD::MULHU, MVT::v32i8, Custom); - setOperationAction(ISD::MULHS, MVT::v32i8, Custom); - - setOperationAction(ISD::ABS, MVT::v4i64, Custom); - setOperationAction(ISD::SMAX, MVT::v4i64, Custom); - setOperationAction(ISD::UMAX, MVT::v4i64, Custom); - setOperationAction(ISD::SMIN, MVT::v4i64, Custom); - setOperationAction(ISD::UMIN, MVT::v4i64, Custom); - - setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); - setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); - setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); - setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); - setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); - setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); - setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); - setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::MUL, MVT::v4i64, Custom); + setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); + setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::MUL, MVT::v32i8, Custom); + + setOperationAction(ISD::MULHU, MVT::v8i32, Custom); + setOperationAction(ISD::MULHS, MVT::v8i32, Custom); + setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::MULHU, MVT::v32i8, Custom); + setOperationAction(ISD::MULHS, MVT::v32i8, Custom); + + setOperationAction(ISD::ABS, MVT::v4i64, Custom); + setOperationAction(ISD::SMAX, MVT::v4i64, Custom); + setOperationAction(ISD::UMAX, MVT::v4i64, Custom); + setOperationAction(ISD::SMIN, MVT::v4i64, Custom); + setOperationAction(ISD::UMIN, MVT::v4i64, Custom); + + setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); - for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { - setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); + for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) { + setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); @@ -1255,54 +1255,54 @@ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X - for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { + for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) { setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); + setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); } } - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, - MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { - setOperationAction(ISD::MLOAD, VT, Legal); + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, + MVT::v8f32, MVT::v2f64, MVT::v4f64}) { + setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); } // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v4f32, MVT::v2f64 }) { + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, + MVT::v2f64}) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); } // Custom lower several nodes for 256-bit types. - for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) { - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, + MVT::v4f64}) { + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); } if (HasInt256) - setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); if (HasInt256) { // Custom legalize 2x32 to get a little better code. setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, - MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) - setOperationAction(ISD::MGATHER, VT, Custom); + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, + MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) + setOperationAction(ISD::MGATHER, VT, Custom); } } @@ -1310,22 +1310,22 @@ // available with AVX512. 512-bit vectors are in a separate block controlled // by useAVX512Regs. if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { - addRegisterClass(MVT::v1i1, &X86::VK1RegClass); - addRegisterClass(MVT::v2i1, &X86::VK2RegClass); - addRegisterClass(MVT::v4i1, &X86::VK4RegClass); - addRegisterClass(MVT::v8i1, &X86::VK8RegClass); - addRegisterClass(MVT::v16i1, &X86::VK16RegClass); + addRegisterClass(MVT::v1i1, &X86::VK1RegClass); + addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + addRegisterClass(MVT::v4i1, &X86::VK4RegClass); + addRegisterClass(MVT::v8i1, &X86::VK8RegClass); + addRegisterClass(MVT::v16i1, &X86::VK16RegClass); - setOperationAction(ISD::SELECT, MVT::v1i1, Custom); + setOperationAction(ISD::SELECT, MVT::v1i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); - setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); - setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); - setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); - setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); // There is no byte sized k-register load or store without AVX512DQ. if (!Subtarget.hasDQI()) { @@ -1341,34 +1341,34 @@ } // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { setOperationAction(ISD::SIGN_EXTEND, VT, Custom); setOperationAction(ISD::ZERO_EXTEND, VT, Custom); - setOperationAction(ISD::ANY_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); } - for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); + for (auto VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1}) { + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::SSUBSAT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); } - for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) + for (auto VT : {MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1}) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); } @@ -1378,74 +1378,74 @@ if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { addRegisterClass(MVT::v16i32, &X86::VR512RegClass); addRegisterClass(MVT::v16f32, &X86::VR512RegClass); - addRegisterClass(MVT::v8i64, &X86::VR512RegClass); - addRegisterClass(MVT::v8f64, &X86::VR512RegClass); + addRegisterClass(MVT::v8i64, &X86::VR512RegClass); + addRegisterClass(MVT::v8f64, &X86::VR512RegClass); for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { - setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); + setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); - setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); - setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); - setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); + setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); + setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); + setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); } - for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { - setOperationAction(ISD::FNEG, VT, Custom); - setOperationAction(ISD::FABS, VT, Custom); - setOperationAction(ISD::FMA, VT, Legal); + for (MVT VT : {MVT::v16f32, MVT::v8f64}) { + setOperationAction(ISD::FNEG, VT, Custom); + setOperationAction(ISD::FABS, VT, Custom); + setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::FCOPYSIGN, VT, Custom); } - setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32); - setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); - setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); - setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); - setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); - setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); - setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); + setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); + setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); + setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); if (!Subtarget.hasVLX()) { // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE // to 512-bit rather than use the AVX2 instructions so that we can use // k-masks. for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, - MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { - setOperationAction(ISD::MLOAD, VT, Custom); + MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { + setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::MSTORE, VT, Custom); } } - setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); if (ExperimentalVectorWideningLegalization) { // Need to custom widen this if we don't have AVX512BW. - setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); } - for (auto VT : { MVT::v16f32, MVT::v8f64 }) { - setOperationAction(ISD::FFLOOR, VT, Legal); - setOperationAction(ISD::FCEIL, VT, Legal); - setOperationAction(ISD::FTRUNC, VT, Legal); - setOperationAction(ISD::FRINT, VT, Legal); - setOperationAction(ISD::FNEARBYINT, VT, Legal); + for (auto VT : {MVT::v16f32, MVT::v8f64}) { + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); + setOperationAction(ISD::FNEARBYINT, VT, Legal); } // Without BWI we need to use custom lowering to handle MVT::v64i8 input. @@ -1454,37 +1454,37 @@ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); } - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); - - setOperationAction(ISD::MUL, MVT::v8i64, Custom); - setOperationAction(ISD::MUL, MVT::v16i32, Legal); - - setOperationAction(ISD::MULHU, MVT::v16i32, Custom); - setOperationAction(ISD::MULHS, MVT::v16i32, Custom); - - setOperationAction(ISD::SELECT, MVT::v8f64, Custom); - setOperationAction(ISD::SELECT, MVT::v8i64, Custom); - setOperationAction(ISD::SELECT, MVT::v16i32, Custom); - setOperationAction(ISD::SELECT, MVT::v32i16, Custom); - setOperationAction(ISD::SELECT, MVT::v64i8, Custom); - setOperationAction(ISD::SELECT, MVT::v16f32, Custom); - - for (auto VT : { MVT::v16i32, MVT::v8i64 }) { - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); - setOperationAction(ISD::ABS, VT, Legal); - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::ROTL, VT, Custom); - setOperationAction(ISD::ROTR, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); + + setOperationAction(ISD::MUL, MVT::v8i64, Custom); + setOperationAction(ISD::MUL, MVT::v16i32, Legal); + + setOperationAction(ISD::MULHU, MVT::v16i32, Custom); + setOperationAction(ISD::MULHS, MVT::v16i32, Custom); + + setOperationAction(ISD::SELECT, MVT::v8f64, Custom); + setOperationAction(ISD::SELECT, MVT::v8i64, Custom); + setOperationAction(ISD::SELECT, MVT::v16i32, Custom); + setOperationAction(ISD::SELECT, MVT::v32i16, Custom); + setOperationAction(ISD::SELECT, MVT::v64i8, Custom); + setOperationAction(ISD::SELECT, MVT::v16f32, Custom); + + for (auto VT : {MVT::v16i32, MVT::v8i64}) { + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -1498,54 +1498,54 @@ setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - setOperationAction(ISD::MUL, MVT::v8i64, Legal); + setOperationAction(ISD::MUL, MVT::v8i64, Legal); } if (Subtarget.hasCDI()) { // NonVLX sub-targets extend 128/256 vectors to use the 512 version. - for (auto VT : { MVT::v16i32, MVT::v8i64} ) { - setOperationAction(ISD::CTLZ, VT, Legal); + for (auto VT : {MVT::v16i32, MVT::v8i64}) { + setOperationAction(ISD::CTLZ, VT, Legal); } } // Subtarget.hasCDI() if (Subtarget.hasVPOPCNTDQ()) { - for (auto VT : { MVT::v16i32, MVT::v8i64 }) + for (auto VT : {MVT::v16i32, MVT::v8i64}) setOperationAction(ISD::CTPOP, VT, Legal); } // Extract subvector is special because the value type // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Legal under AVX1. - for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) + for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, + MVT::v4f64}) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); - for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - setOperationAction(ISD::MGATHER, VT, Custom); - setOperationAction(ISD::MSCATTER, VT, Custom); + for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::MLOAD, VT, Legal); + setOperationAction(ISD::MSTORE, VT, Legal); + setOperationAction(ISD::MGATHER, VT, Custom); + setOperationAction(ISD::MSCATTER, VT, Custom); } // Need to custom split v32i16/v64i8 bitcasts. if (!Subtarget.hasBWI()) { setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); - setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); + setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); } if (Subtarget.hasVBMI2()) { - for (auto VT : { MVT::v16i32, MVT::v8i64 }) { + for (auto VT : {MVT::v16i32, MVT::v8i64}) { setOperationAction(ISD::FSHL, VT, Custom); setOperationAction(ISD::FSHR, VT, Custom); } } - }// has AVX-512 + } // has AVX-512 // This block controls legalization for operations that don't have // pre-AVX512 equivalents. Without VLX we use 512-bit operations for @@ -1555,52 +1555,52 @@ // isel patterns. // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? - setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); - for (auto VT : { MVT::v2i64, MVT::v4i64 }) { + for (auto VT : {MVT::v2i64, MVT::v4i64}) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); - setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); } - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::ROTL, VT, Custom); - setOperationAction(ISD::ROTR, VT, Custom); + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) { + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } // Custom legalize 2x32 to get a little better code. setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, - MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, + MVT::v8f32, MVT::v2f64, MVT::v4f64}) setOperationAction(ISD::MSCATTER, VT, Custom); if (Subtarget.hasDQI()) { - for (auto VT : { MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::SINT_TO_FP, VT, Legal); - setOperationAction(ISD::UINT_TO_FP, VT, Legal); - setOperationAction(ISD::FP_TO_SINT, VT, Legal); - setOperationAction(ISD::FP_TO_UINT, VT, Legal); + for (auto VT : {MVT::v2i64, MVT::v4i64}) { + setOperationAction(ISD::SINT_TO_FP, VT, Legal); + setOperationAction(ISD::UINT_TO_FP, VT, Legal); + setOperationAction(ISD::FP_TO_SINT, VT, Legal); + setOperationAction(ISD::FP_TO_UINT, VT, Legal); - setOperationAction(ISD::MUL, VT, Legal); + setOperationAction(ISD::MUL, VT, Legal); } } if (Subtarget.hasCDI()) { - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::CTLZ, VT, Legal); + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) { + setOperationAction(ISD::CTLZ, VT, Legal); } } // Subtarget.hasCDI() if (Subtarget.hasVPOPCNTDQ()) { - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) setOperationAction(ISD::CTPOP, VT, Legal); } } @@ -1609,39 +1609,39 @@ // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with // useBWIRegs. if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { - addRegisterClass(MVT::v32i1, &X86::VK32RegClass); - addRegisterClass(MVT::v64i1, &X86::VK64RegClass); + addRegisterClass(MVT::v32i1, &X86::VK32RegClass); + addRegisterClass(MVT::v64i1, &X86::VK64RegClass); - for (auto VT : { MVT::v32i1, MVT::v64i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); + for (auto VT : {MVT::v32i1, MVT::v64i1}) { + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::SSUBSAT, VT, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); - for (auto VT : { MVT::v16i1, MVT::v32i1 }) + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + for (auto VT : {MVT::v16i1, MVT::v32i1}) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Extends from v32i1 masks to 256-bit vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); } // This block controls legalization for v32i16 and v64i8. 512-bits can be @@ -1649,62 +1649,62 @@ // attributes. if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) { addRegisterClass(MVT::v32i16, &X86::VR512RegClass); - addRegisterClass(MVT::v64i8, &X86::VR512RegClass); + addRegisterClass(MVT::v64i8, &X86::VR512RegClass); // Extends from v64i1 masks to 512-bit vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); - - setOperationAction(ISD::MUL, MVT::v32i16, Legal); - setOperationAction(ISD::MUL, MVT::v64i8, Custom); - setOperationAction(ISD::MULHS, MVT::v32i16, Legal); - setOperationAction(ISD::MULHU, MVT::v32i16, Legal); - setOperationAction(ISD::MULHS, MVT::v64i8, Custom); - setOperationAction(ISD::MULHU, MVT::v64i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); + setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); + + setOperationAction(ISD::MUL, MVT::v32i16, Legal); + setOperationAction(ISD::MUL, MVT::v64i8, Custom); + setOperationAction(ISD::MULHS, MVT::v32i16, Legal); + setOperationAction(ISD::MULHU, MVT::v32i16, Legal); + setOperationAction(ISD::MULHS, MVT::v64i8, Custom); + setOperationAction(ISD::MULHU, MVT::v64i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); - setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); + setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); - setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); + setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); - for (auto VT : { MVT::v64i8, MVT::v32i16 }) { + for (auto VT : {MVT::v64i8, MVT::v32i16}) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::ABS, VT, Legal); - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::CTLZ, VT, Custom); - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::UADDSAT, VT, Legal); - setOperationAction(ISD::SADDSAT, VT, Legal); - setOperationAction(ISD::USUBSAT, VT, Legal); - setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Custom); + setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Legal); + setOperationAction(ISD::MSTORE, VT, Legal); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTLZ, VT, Custom); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -1717,7 +1717,7 @@ } if (Subtarget.hasBITALG()) { - for (auto VT : { MVT::v64i8, MVT::v32i16 }) + for (auto VT : {MVT::v64i8, MVT::v32i16}) setOperationAction(ISD::CTPOP, VT, Legal); } @@ -1728,8 +1728,8 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { - for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { - setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); + for (auto VT : {MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16}) { + setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); } @@ -1738,44 +1738,44 @@ // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? if (Subtarget.hasBITALG()) { - for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) + for (auto VT : {MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16}) setOperationAction(ISD::CTPOP, VT, Legal); } } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { - setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); + setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); - setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); - setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); + setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); - setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); if (Subtarget.hasDQI()) { // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. // v2f32 UINT_TO_FP is already custom under SSE2. - setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"); // v2i64 FP_TO_S/UINT(v2f32) custom conversion. - setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); } if (Subtarget.hasBWI()) { - setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); } if (Subtarget.hasVBMI2()) { // TODO: Make these legal even without VLX? - for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, + MVT::v8i32, MVT::v4i64}) { setOperationAction(ISD::FSHL, VT, Custom); setOperationAction(ISD::FSHR, VT, Custom); } @@ -1796,7 +1796,7 @@ // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; // Add/Sub/Mul with overflow operations are custom lowered. @@ -1942,8 +1942,7 @@ return TypeSplitVector; if (ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() != 1 && - VT.getVectorElementType() != MVT::i1) + VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); @@ -1966,8 +1965,7 @@ } EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, - LLVMContext& Context, - EVT VT) const { + LLVMContext &Context, EVT VT) const { if (!VT.isVector()) return MVT::i8; @@ -2106,11 +2104,10 @@ return true; } -bool -X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, - unsigned, - bool *Fast) const { +bool X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, + unsigned, + MachineMemOperand::Flags, + bool *Fast) const { if (Fast) { switch (VT.getSizeInBits()) { default: @@ -2123,7 +2120,7 @@ case 256: *Fast = !Subtarget.isUnalignedMem32Slow(); break; - // TODO: What about AVX-512 (512-bit) accesses? + // TODO: What about AVX-512 (512-bit) accesses? } } // Misaligned accesses of any size are always allowed. @@ -2175,15 +2172,14 @@ } } -const MCExpr * -X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned uid,MCContext &Ctx) const{ +const MCExpr *X86TargetLowering::LowerCustomJumpTableEntry( + const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, + unsigned uid, MCContext &Ctx) const { assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. - return MCSymbolRefExpr::create(MBB->getSymbol(), - MCSymbolRefExpr::VK_GOTOFF, Ctx); + return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF, + Ctx); } /// Returns relocation base for the given PIC jumptable. @@ -2199,9 +2195,8 @@ /// This returns the relocation base for the given PIC jumptable, /// the same as getPICJumpTableRelocBase, but as an MCExpr. -const MCExpr *X86TargetLowering:: -getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, - MCContext &Ctx) const { +const MCExpr *X86TargetLowering::getPICJumpTableRelocBaseExpr( + const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { // X86-64 uses RIP relative addressing based on the jump table label. if (Subtarget.isPICStyleRIPRel()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); @@ -2218,19 +2213,35 @@ switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(TRI, VT); - case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; break; case MVT::x86mmx: RRC = &X86::VR64RegClass; break; - case MVT::f32: case MVT::f64: - case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: - case MVT::v4f32: case MVT::v2f64: - case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: - case MVT::v8f32: case MVT::v4f64: - case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - case MVT::v16f32: case MVT::v8f64: + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + case MVT::v4i64: + case MVT::v8f32: + case MVT::v4f64: + case MVT::v64i8: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v8i64: + case MVT::v16f32: + case MVT::v8f64: RRC = &X86::VR128XRegClass; break; } @@ -2248,8 +2259,8 @@ (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); } -static Constant* SegmentOffset(IRBuilder<> &IRB, - unsigned Offset, unsigned AddressSpace) { +static Constant *SegmentOffset(IRBuilder<> &IRB, unsigned Offset, + unsigned AddressSpace) { return ConstantExpr::getIntToPtr( ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); @@ -2359,7 +2370,7 @@ } const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { - static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; + static const MCPhysReg ScratchRegs[] = {X86::R11, 0}; return ScratchRegs; } @@ -2447,8 +2458,8 @@ SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop - RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, - MVT::i32)); + RetOps.push_back( + DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, MVT::i32)); // Copy the result values into the output registers. for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; @@ -2473,8 +2484,7 @@ ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); else ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); - } - else if (VA.getLocInfo() == CCValAssign::BCvt) + } else if (VA.getLocInfo() == CCValAssign::BCvt) ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); assert(VA.getLocInfo() != CCValAssign::FPExt && @@ -2498,8 +2508,7 @@ // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. - if (VA.getLocReg() == X86::FP0 || - VA.getLocReg() == X86::FP1) { + if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) { // If this is a copy from an xmm register to ST(0), use an FPExtend to // change the value to the FP stack register class. if (isScalarFPTypeInSSEReg(VA.getValVT())) @@ -2515,8 +2524,8 @@ if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); - ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, - ValToCopy); + ValToCopy = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget.hasSSE2()) @@ -2587,9 +2596,9 @@ SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, getPointerTy(MF.getDataLayout())); - unsigned RetValReg - = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? - X86::RAX : X86::EAX; + unsigned RetValReg = + (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? X86::RAX + : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); @@ -2614,7 +2623,7 @@ } } - RetOps[0] = Chain; // Update chain. + RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) @@ -2635,7 +2644,8 @@ if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. - if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == + MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) @@ -2651,7 +2661,7 @@ if (UI->getNumOperands() > 4) return false; if (UI->getNumOperands() == 4 && - UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) + UI->getOperand(UI->getNumOperands() - 1).getValueType() != MVT::Glue) return false; HasRet = true; } @@ -2721,10 +2731,10 @@ // When a physical register is available read the value from it and glue // the reads together. ArgValueLo = - DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); + DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); *InFlag = ArgValueLo.getValue(2); ArgValueHi = - DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); + DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); *InFlag = ArgValueHi.getValue(2); } @@ -2869,13 +2879,9 @@ /// CallIsStructReturn - Determines whether a call uses struct return /// semantics. -enum StructReturnType { - NotStructReturn, - RegStructReturn, - StackStructReturn -}; -static StructReturnType -callIsStructReturn(ArrayRef Outs, bool IsMCU) { +enum StructReturnType { NotStructReturn, RegStructReturn, StackStructReturn }; +static StructReturnType callIsStructReturn(ArrayRef Outs, + bool IsMCU) { if (Outs.empty()) return NotStructReturn; @@ -2888,8 +2894,8 @@ } /// Determines whether a function uses struct return semantics. -static StructReturnType -argsAreStructReturn(ArrayRef Ins, bool IsMCU) { +static StructReturnType argsAreStructReturn(ArrayRef Ins, + bool IsMCU) { if (Ins.empty()) return NotStructReturn; @@ -2910,9 +2916,9 @@ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - /*isVolatile*/false, /*AlwaysInline=*/true, - /*isTailCall*/false, - MachinePointerInfo(), MachinePointerInfo()); + /*isVolatile*/ false, /*AlwaysInline=*/true, + /*isTailCall*/ false, MachinePointerInfo(), + MachinePointerInfo()); } /// Return true if the calling convention is one that we can guarantee TCO for. @@ -2994,7 +3000,8 @@ // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { unsigned Bytes = Flags.getByValSize(); - if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. + if (Bytes == 0) + Bytes = 1; // Don't create zero-sized stack objects. // FIXME: For now, all byval parameter objects are marked as aliasing. This // can be improved with deeper analysis. @@ -3075,16 +3082,16 @@ assert(Subtarget.is64Bit()); if (Subtarget.isCallingConvWin64(CallConv)) { - static const MCPhysReg GPR64ArgRegsWin64[] = { - X86::RCX, X86::RDX, X86::R8, X86::R9 - }; - return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); + static const MCPhysReg GPR64ArgRegsWin64[] = {X86::RCX, X86::RDX, X86::R8, + X86::R9}; + return makeArrayRef(std::begin(GPR64ArgRegsWin64), + std::end(GPR64ArgRegsWin64)); } - static const MCPhysReg GPR64ArgRegs64Bit[] = { - X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 - }; - return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); + static const MCPhysReg GPR64ArgRegs64Bit[] = {X86::RDI, X86::RSI, X86::RDX, + X86::RCX, X86::R8, X86::R9}; + return makeArrayRef(std::begin(GPR64ArgRegs64Bit), + std::end(GPR64ArgRegs64Bit)); } // FIXME: Get this from tablegen. @@ -3110,10 +3117,9 @@ // registers. return None; - static const MCPhysReg XMMArgRegs64Bit[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 - }; + static const MCPhysReg XMMArgRegs64Bit[] = {X86::XMM0, X86::XMM1, X86::XMM2, + X86::XMM3, X86::XMM4, X86::XMM5, + X86::XMM6, X86::XMM7}; return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); } @@ -3327,8 +3333,7 @@ SDValue ALVal; for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass); - LiveGPRs.push_back( - DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64)); + LiveGPRs.push_back(DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64)); } if (!ArgXMMs.empty()) { unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); @@ -3381,10 +3386,10 @@ SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back(DAG.getIntPtrConstant( - FuncInfo->getRegSaveFrameIndex(), dl)); - SaveXMMOps.push_back(DAG.getIntPtrConstant( - FuncInfo->getVarArgsFPOffset(), dl)); + SaveXMMOps.push_back( + DAG.getIntPtrConstant(FuncInfo->getRegSaveFrameIndex(), dl)); + SaveXMMOps.push_back( + DAG.getIntPtrConstant(FuncInfo->getVarArgsFPOffset(), dl)); SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), LiveXMMRegs.end()); MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, @@ -3528,11 +3533,11 @@ EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl) { // Store the return address to the appropriate stack slot. - if (!FPDiff) return Chain; + if (!FPDiff) + return Chain; // Calculate the new stack slot for the return address. - int NewReturnAddrFI = - MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, - false); + int NewReturnAddrFI = MF.getFrameInfo().CreateFixedObject( + SlotSize, (int64_t)FPDiff - SlotSize, false); SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack( @@ -3552,25 +3557,24 @@ return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); } -SDValue -X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; +SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; - SmallVectorImpl &OutVals = CLI.OutVals; - SmallVectorImpl &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - CallingConv::ID CallConv = CLI.CallConv; - bool &isTailCall = CLI.IsTailCall; - bool isVarArg = CLI.IsVarArg; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CallingConv::ID CallConv = CLI.CallConv; + bool &isTailCall = CLI.IsTailCall; + bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - bool Is64Bit = Subtarget.is64Bit(); - bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); + bool Is64Bit = Subtarget.is64Bit(); + bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); - bool IsSibcall = false; + bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); const auto *CI = dyn_cast_or_null(CLI.CS.getInstruction()); @@ -3610,10 +3614,10 @@ isTailCall = true; } else if (isTailCall) { // Check if it's really possible to do a tail call. - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, SR != NotStructReturn, - MF.getFunction().hasStructRetAttr(), CLI.RetTy, - Outs, OutVals, Ins, DAG); + isTailCall = IsEligibleForTailCallOptimization( + Callee, CallConv, isVarArg, SR != NotStructReturn, + MF.getFunction().hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, + DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -3719,8 +3723,10 @@ // Promote the value if needed. switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); break; @@ -3783,10 +3789,18 @@ // shadow reg if callee is a varargs function. unsigned ShadowReg = 0; switch (VA.getLocReg()) { - case X86::XMM0: ShadowReg = X86::RCX; break; - case X86::XMM1: ShadowReg = X86::RDX; break; - case X86::XMM2: ShadowReg = X86::R8; break; - case X86::XMM3: ShadowReg = X86::R9; break; + case X86::XMM0: + ShadowReg = X86::RCX; + break; + case X86::XMM1: + ShadowReg = X86::RDX; + break; + case X86::XMM2: + ShadowReg = X86::R8; + break; + case X86::XMM3: + ShadowReg = X86::R9; + break; } if (ShadowReg) RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); @@ -3796,8 +3810,8 @@ if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy(DAG.getDataLayout())); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, - dl, DAG, VA, Flags)); + MemOpChains.push_back( + LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } } @@ -3840,17 +3854,15 @@ // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. - static const MCPhysReg XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 - }; + static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2, + X86::XMM3, X86::XMM4, X86::XMM5, + X86::XMM6, X86::XMM7}; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); - assert((Subtarget.hasSSE1() || !NumXMMRegs) - && "SSE registers cannot be used when SSE is disabled"); + assert((Subtarget.hasSSE1() || !NumXMMRegs) && + "SSE registers cannot be used when SSE is disabled"); - RegsToPass.push_back(std::make_pair(unsigned(X86::AL), - DAG.getConstant(NumXMMRegs, dl, - MVT::i8))); + RegsToPass.push_back(std::make_pair( + unsigned(X86::AL), DAG.getConstant(NumXMMRegs, dl, MVT::i8))); } if (isVarArg && IsMustTail) { @@ -3899,8 +3911,8 @@ if (Flags.isInAlloca()) continue; // Create frame index. - int32_t Offset = VA.getLocMemOffset()+FPDiff; - uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; + int32_t Offset = VA.getLocMemOffset() + FPDiff; + uint32_t OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8; FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); @@ -3913,9 +3925,8 @@ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, Source); - MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, - ArgChain, - Flags, DAG, dl)); + MemOpChains2.push_back( + CreateCopyOfByValArgument(Source, FIN, ArgChain, Flags, DAG, dl)); } else { // Store relative to framepointer. MemOpChains2.push_back(DAG.getStore( @@ -4059,17 +4070,16 @@ unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, DAG.getTarget().Options.GuaranteedTailCallOpt)) - NumBytesForCalleeToPop = NumBytes; // Callee pops everything + NumBytesForCalleeToPop = NumBytes; // Callee pops everything else if (!Is64Bit && !canGuaranteeTCO(CallConv) && - !Subtarget.getTargetTriple().isOSMSVCRT() && - SR == StackStructReturn) + !Subtarget.getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. // For MSVC Win32 targets, the caller pops the hidden struct pointer. NumBytesForCalleeToPop = 4; else - NumBytesForCalleeToPop = 0; // Callee pops nothing. + NumBytesForCalleeToPop = 0; // Callee pops nothing. if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) { // No need to reset the stack after the call if the call doesn't return. To @@ -4079,11 +4089,9 @@ // Returns a flag for retval copy to use. if (!IsSibcall) { - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytesToPop, dl, true), - DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, - true), - InFlag, dl); + Chain = DAG.getCALLSEQ_END( + Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } @@ -4128,30 +4136,31 @@ /// requirement. unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; unsigned SlotSize = RegInfo->getSlotSize(); - if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { + if ((Offset & AlignMask) <= (StackAlignment - SlotSize)) { // Number smaller than 12 so just add the difference. Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); } else { // Mask out lower bits, add stackalignment once plus the 12 bytes. - Offset = ((~AlignMask) & Offset) + StackAlignment + - (StackAlignment-SlotSize); + Offset = + ((~AlignMask) & Offset) + StackAlignment + (StackAlignment - SlotSize); } return Offset; } /// Return true if the given stack call argument is already available in the /// same position (relatively) of the caller's incoming argument stack. -static -bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, - const X86InstrInfo *TII, const CCValAssign &VA) { +static bool MatchingStackOffset(SDValue Arg, unsigned Offset, + ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, + const MachineRegisterInfo *MRI, + const X86InstrInfo *TII, + const CCValAssign &VA) { unsigned Bytes = Arg.getValueSizeInBits() / 8; for (;;) { @@ -4332,8 +4341,8 @@ } // Check that the call results are passed in the same way. - if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, - RetCC_X86, RetCC_X86)) + if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, RetCC_X86, + RetCC_X86)) return false; // The callee has to preserve all registers the caller needs to preserve. const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -4374,8 +4383,8 @@ if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (!VA.isRegLoc()) { - if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, - MFI, MRI, TII, VA)) + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, + TII, VA)) return false; } } @@ -4401,8 +4410,11 @@ continue; unsigned Reg = VA.getLocReg(); switch (Reg) { - default: break; - case X86::EAX: case X86::EDX: case X86::ECX: + default: + break; + case X86::EAX: + case X86::EDX: + case X86::ECX: if (++NumInRegs == MaxInRegs) return false; break; @@ -4460,8 +4472,9 @@ } static bool isTargetShuffle(unsigned Opcode) { - switch(Opcode) { - default: return false; + switch (Opcode) { + default: + return false; case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -4500,7 +4513,8 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) { switch (Opcode) { - default: return false; + default: + return false; // Target Shuffles. case X86ISD::PSHUFB: case X86ISD::VPERMILPV: @@ -4526,9 +4540,8 @@ if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); - ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, - -(int64_t)SlotSize, - false); + ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject( + SlotSize, -(int64_t)SlotSize, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -4553,7 +4566,7 @@ // For small code model we assume that latest object is 16MB before end of 31 // bits boundary. We may also accept pretty large negative constants knowing // that all objects are in the positive half of address space. - if (M == CodeModel::Small && Offset < 16*1024*1024) + if (M == CodeModel::Small && Offset < 16 * 1024 * 1024) return true; // For kernel code model we know that all object resist in the negative half @@ -4567,8 +4580,8 @@ /// Determines whether the callee is required to pop its own arguments. /// Callee pop is necessary to support tail calls. -bool X86::isCalleePop(CallingConv::ID CallingConv, - bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { +bool X86::isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, + bool GuaranteeTCO) { // If GuaranteeTCO is true, we force some calls to be callee pop so that we // can guarantee TCO. if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) @@ -4607,17 +4620,28 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { switch (SetCCOpcode) { - default: llvm_unreachable("Invalid integer condition!"); - case ISD::SETEQ: return X86::COND_E; - case ISD::SETGT: return X86::COND_G; - case ISD::SETGE: return X86::COND_GE; - case ISD::SETLT: return X86::COND_L; - case ISD::SETLE: return X86::COND_LE; - case ISD::SETNE: return X86::COND_NE; - case ISD::SETULT: return X86::COND_B; - case ISD::SETUGT: return X86::COND_A; - case ISD::SETULE: return X86::COND_BE; - case ISD::SETUGE: return X86::COND_AE; + default: + llvm_unreachable("Invalid integer condition!"); + case ISD::SETEQ: + return X86::COND_E; + case ISD::SETGT: + return X86::COND_G; + case ISD::SETGE: + return X86::COND_GE; + case ISD::SETLT: + return X86::COND_L; + case ISD::SETLE: + return X86::COND_LE; + case ISD::SETNE: + return X86::COND_NE; + case ISD::SETULT: + return X86::COND_B; + case ISD::SETUGT: + return X86::COND_A; + case ISD::SETULE: + return X86::COND_BE; + case ISD::SETUGE: + return X86::COND_AE; } } @@ -4625,8 +4649,8 @@ /// condition code, returning the condition code and the LHS/RHS of the /// comparison to make. static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, - bool isFP, SDValue &LHS, SDValue &RHS, - SelectionDAG &DAG) { + bool isFP, SDValue &LHS, SDValue &RHS, + SelectionDAG &DAG) { if (!isFP) { if (ConstantSDNode *RHSC = dyn_cast(RHS)) { if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { @@ -4651,14 +4675,14 @@ // First determine if it is required or is profitable to flip the operands. // If LHS is a foldable load, but RHS is not, flip the condition. - if (ISD::isNON_EXTLoad(LHS.getNode()) && - !ISD::isNON_EXTLoad(RHS.getNode())) { + if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) { SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); std::swap(LHS, RHS); } switch (SetCCOpcode) { - default: break; + default: + break; case ISD::SETOLT: case ISD::SETOLE: case ISD::SETUGT: @@ -4674,27 +4698,37 @@ // 1 | 0 | 0 | X == Y // 1 | 1 | 1 | unordered switch (SetCCOpcode) { - default: llvm_unreachable("Condcode should be pre-legalized away"); + default: + llvm_unreachable("Condcode should be pre-legalized away"); case ISD::SETUEQ: - case ISD::SETEQ: return X86::COND_E; - case ISD::SETOLT: // flipped + case ISD::SETEQ: + return X86::COND_E; + case ISD::SETOLT: // flipped case ISD::SETOGT: - case ISD::SETGT: return X86::COND_A; - case ISD::SETOLE: // flipped + case ISD::SETGT: + return X86::COND_A; + case ISD::SETOLE: // flipped case ISD::SETOGE: - case ISD::SETGE: return X86::COND_AE; - case ISD::SETUGT: // flipped + case ISD::SETGE: + return X86::COND_AE; + case ISD::SETUGT: // flipped case ISD::SETULT: - case ISD::SETLT: return X86::COND_B; - case ISD::SETUGE: // flipped + case ISD::SETLT: + return X86::COND_B; + case ISD::SETUGE: // flipped case ISD::SETULE: - case ISD::SETLE: return X86::COND_BE; + case ISD::SETLE: + return X86::COND_BE; case ISD::SETONE: - case ISD::SETNE: return X86::COND_NE; - case ISD::SETUO: return X86::COND_P; - case ISD::SETO: return X86::COND_NP; + case ISD::SETNE: + return X86::COND_NE; + case ISD::SETUO: + return X86::COND_P; + case ISD::SETO: + return X86::COND_NP; case ISD::SETOEQ: - case ISD::SETUNE: return X86::COND_INVALID; + case ISD::SETUNE: + return X86::COND_INVALID; } } @@ -4717,13 +4751,12 @@ } } - bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { - const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); + const IntrinsicData *IntrData = getIntrinsicWithChain(Intrinsic); if (!IntrData) return false; @@ -4736,7 +4769,7 @@ case TRUNCATE_TO_MEM_VI32: { Info.opc = ISD::INTRINSIC_VOID; Info.ptrVal = I.getArgOperand(0); - MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); + MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE; if (IntrData->Type == TRUNCATE_TO_MEM_VI8) ScalarVT = MVT::i8; @@ -4756,8 +4789,8 @@ Info.ptrVal = nullptr; MVT DataVT = MVT::getVT(I.getType()); MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); - unsigned NumElts = std::min(DataVT.getVectorNumElements(), - IndexVT.getVectorNumElements()); + unsigned NumElts = + std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements()); Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); Info.align = 1; Info.flags |= MachineMemOperand::MOLoad; @@ -4768,8 +4801,8 @@ Info.ptrVal = nullptr; MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType()); MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); - unsigned NumElts = std::min(DataVT.getVectorNumElements(), - IndexVT.getVectorNumElements()); + unsigned NumElts = + std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements()); Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); Info.align = 1; Info.flags |= MachineMemOperand::MOStore; @@ -4868,8 +4901,9 @@ // Mask vectors support all subregister combinations and operations that // extract half of vector. if (ResVT.getVectorElementType() == MVT::i1) - return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) && - (Index == ResVT.getVectorNumElements())); + return Index == 0 || + ((ResVT.getSizeInBits() == SrcVT.getSizeInBits() * 2) && + (Index == ResVT.getVectorNumElements())); return (Index % ResVT.getVectorNumElements()) == 0; } @@ -4933,9 +4967,7 @@ return true; } -bool X86TargetLowering::isCtlzFast() const { - return Subtarget.hasFastLZCNT(); -} +bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); } bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { @@ -5086,8 +5118,7 @@ /// Return true if every element in Mask is undef or if its value /// falls within the specified range (L, H]. -static bool isUndefOrInRange(ArrayRef Mask, - int Low, int Hi) { +static bool isUndefOrInRange(ArrayRef Mask, int Low, int Hi) { for (int M : Mask) if (!isUndefOrInRange(M, Low, Hi)) return false; @@ -5200,8 +5231,7 @@ return true; } -static bool canWidenShuffleElements(ArrayRef Mask, - const APInt &Zeroable, +static bool canWidenShuffleElements(ArrayRef Mask, const APInt &Zeroable, SmallVectorImpl &WidenedMask) { SmallVector TargetMask(Mask.begin(), Mask.end()); for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { @@ -5229,7 +5259,7 @@ static SDValue getConstVector(ArrayRef Values, MVT VT, SelectionDAG &DAG, const SDLoc &dl, bool IsMask = false) { - SmallVector Ops; + SmallVector Ops; bool Split = false; MVT ConstVecVT = VT; @@ -5243,12 +5273,12 @@ MVT EltVT = ConstVecVT.getVectorElementType(); for (unsigned i = 0; i < NumElts; ++i) { bool IsUndef = Values[i] < 0 && IsMask; - SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : - DAG.getConstant(Values[i], dl, EltVT); + SDValue OpNode = + IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(Values[i], dl, EltVT); Ops.push_back(OpNode); if (Split) - Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : - DAG.getConstant(0, dl, EltVT)); + Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) + : DAG.getConstant(0, dl, EltVT)); } SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); if (Split) @@ -5256,8 +5286,8 @@ return ConstsNode; } -static SDValue getConstVector(ArrayRef Bits, APInt &Undefs, - MVT VT, SelectionDAG &DAG, const SDLoc &dl) { +static SDValue getConstVector(ArrayRef Bits, APInt &Undefs, MVT VT, + SelectionDAG &DAG, const SDLoc &dl) { assert(Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"); SmallVector Ops; @@ -5325,9 +5355,9 @@ const SDLoc &dl, unsigned vectorWidth) { EVT VT = Vec.getValueType(); EVT ElVT = VT.getVectorElementType(); - unsigned Factor = VT.getSizeInBits()/vectorWidth; + unsigned Factor = VT.getSizeInBits() / vectorWidth; EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, - VT.getVectorNumElements()/Factor); + VT.getVectorNumElements() / Factor); // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); @@ -5355,7 +5385,8 @@ static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl) { assert((Vec.getValueType().is256BitVector() || - Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); + Vec.getValueType().is512BitVector()) && + "Unexpected vector size!"); return extractSubVector(Vec, IdxVal, DAG, dl, 128); } @@ -5379,7 +5410,7 @@ EVT ResultVT = Result.getValueType(); // Insert the relevant vectorWidth bits. - unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); + unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk @@ -5545,8 +5576,7 @@ if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { // May need to promote to a legal type. Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), - SubVec, Idx); + getZeroVector(WideOpVT, Subtarget, DAG, dl), SubVec, Idx); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } @@ -5562,20 +5592,19 @@ if (IdxVal == 0) { // Zero lower bits of the Vec SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, - ZeroIdx); + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); // Merge them together, SubVec should be zero extended. SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), - SubVec, ZeroIdx); + getZeroVector(WideOpVT, Subtarget, DAG, dl), SubVec, + ZeroIdx); Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } - SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - Undef, SubVec, ZeroIdx); + SubVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, SubVec, ZeroIdx); if (Vec.isUndef()) { assert(IdxVal != 0 && "Unexpected index"); @@ -5606,12 +5635,12 @@ // isel to opimitize when bits are known zero. Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), - Vec, ZeroIdx); + getZeroVector(WideOpVT, Subtarget, DAG, dl), Vec, + ZeroIdx); } else { // Otherwise use explicit shifts to zero the bits. - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - Undef, Vec, ZeroIdx); + Vec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); NumElems = WideOpVT.getVectorNumElements(); SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); @@ -5639,7 +5668,7 @@ // Shift to the final position, filling upper bits with 0. unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, - DAG.getConstant(ShiftRight, dl, MVT::i8)); + DAG.getConstant(ShiftRight, dl, MVT::i8)); // Xor with original vector leaving the new value. Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); // Reduce to original width if needed. @@ -5683,8 +5712,8 @@ } if (VT.getVectorNumElements() == InVT.getVectorNumElements()) - return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - DL, VT, In); + return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, + In); return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND_VECTOR_INREG, @@ -5711,13 +5740,12 @@ /// This produces a shuffle where the low element of V2 is swizzled into the /// zero/undef vector, landing at element Idx. /// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). -static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, - bool IsZero, +static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = V2.getSimpleValueType(); - SDValue V1 = IsZero - ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); + SDValue V1 = + IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); int NumElems = VT.getVectorNumElements(); SmallVector MaskVec(NumElems); for (int i = 0; i != NumElems; ++i) @@ -6434,7 +6462,8 @@ } return false; } - default: llvm_unreachable("unknown target shuffle node"); + default: + llvm_unreachable("unknown target shuffle node"); } // Empty mask indicates the decode failed. @@ -6469,8 +6498,7 @@ /// SM_SentinelZero - this is for elements that are known to be zero /// (not just zeroable) from their inputs. /// Returns true if the target shuffle mask was decoded. -static bool setTargetShuffleZeroElements(SDValue N, - SmallVectorImpl &Mask, +static bool setTargetShuffleZeroElements(SDValue N, SmallVectorImpl &Mask, SmallVectorImpl &Ops) { bool IsUnary; if (!isTargetShuffle(N.getOpcode())) @@ -6656,10 +6684,11 @@ int InsertIdx = N.getConstantOperandVal(2); if (SubMask.size() != NumSubElts) { assert(((SubMask.size() % NumSubElts) == 0 || - (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale"); + (NumSubElts % SubMask.size()) == 0) && + "Illegal submask scale"); if ((NumSubElts % SubMask.size()) == 0) { int Scale = NumSubElts / SubMask.size(); - SmallVector ScaledSubMask; + SmallVector ScaledSubMask; scaleShuffleMask(Scale, SubMask, ScaledSubMask); SubMask = ScaledSubMask; } else { @@ -6877,7 +6906,8 @@ return false; } -/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly. +/// Removes unused shuffle source inputs and adjusts the shuffle mask +/// accordingly. static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, SmallVectorImpl &Mask) { int MaskWidth = Mask.size(); @@ -6926,7 +6956,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth) { if (Depth == 6) - return SDValue(); // Limit search depth. + return SDValue(); // Limit search depth. SDValue V = SDValue(N, 0); EVT VT = V.getValueType(); @@ -6940,9 +6970,9 @@ return DAG.getUNDEF(VT.getVectorElementType()); unsigned NumElems = VT.getVectorNumElements(); - SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0) - : SV->getOperand(1); - return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); + SDValue NewV = + (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth + 1); } // Recurse into target specific vector shuffles to find scalars. @@ -6954,7 +6984,8 @@ SmallVector ShuffleOps; bool IsUnary; - if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary)) + if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, + IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; @@ -6964,10 +6995,9 @@ if (Elt == SM_SentinelUndef) return DAG.getUNDEF(ShufSVT); - assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"); + assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range"); SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1]; - return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, - Depth+1); + return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth + 1); } // Recurse into insert_subvector base/sub vector to find scalars. @@ -7148,8 +7178,8 @@ MVT EltVT = VT.getVectorElementType(); // Create a new build vector with the first 2 elements followed by undef // padding, bitcast to v2f64, duplicate, and bitcast back. - SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), - DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; + SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT), + DAG.getUNDEF(EltVT)}; SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV); return DAG.getBitcast(VT, Dup); @@ -7197,7 +7227,7 @@ for (EltIdx = 0; EltIdx < 4; ++EltIdx) { if (Zeroable[EltIdx]) { // The zero vector will be on the right hand side. - Mask[EltIdx] = EltIdx+4; + Mask[EltIdx] = EltIdx + 4; continue; } @@ -7268,7 +7298,7 @@ unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); - SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8); + SDValue ShiftVal = DAG.getConstant(NumBits / 8, dl, MVT::i8); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } @@ -7302,7 +7332,7 @@ // FIXME: 256-bit vector instructions don't require a strict alignment, // improve this code to support it better. - unsigned RequiredAlign = VT.getSizeInBits()/8; + unsigned RequiredAlign = VT.getSizeInBits() / 8; SDValue Chain = LD->getChain(); // Make sure the stack object alignment is at least 16 or 32. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -7469,7 +7499,8 @@ // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded // vector and a zero vector to clear out the zero elements. - if (!isAfterLegalize && VT.isVector() && NumElems == VT.getVectorNumElements()) { + if (!isAfterLegalize && VT.isVector() && + NumElems == VT.getVectorNumElements()) { SmallVector ClearMask(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { if (ZeroMask[i]) @@ -7511,12 +7542,10 @@ MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize); if (TLI.isTypeLegal(VecVT)) { SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); - SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, - LDBase->getPointerInfo(), - LDBase->getAlignment(), - MachineMemOperand::MOLoad); + SDValue Ops[] = {LDBase->getChain(), LDBase->getBasePtr()}; + SDValue ResNode = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), + LDBase->getAlignment(), MachineMemOperand::MOLoad); for (auto *LD : Loads) DAG.makeEquivalentMemoryOrdering(LD, ResNode); return DAG.getBitcast(VT, ResNode); @@ -7719,7 +7748,7 @@ else BOperand = Ld.getOperand(0).getOperand(0); MVT MaskVT = BOperand.getSimpleValueType(); - if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q + if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { // for broadcastmw2d SDValue Brdcst = DAG.getNode(X86ISD::VBROADCASTM, dl, @@ -7793,8 +7822,8 @@ } else if (SplatBitSize > 64) { // Load the vector of constants and broadcast it. MVT CVT = VT.getScalarType(); - Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, - *Ctx); + Constant *VecC = + getConstantVector(VT, SplatValue, SplatBitSize, *Ctx); SDValue VCP = DAG.getConstantPool(VecC, PVT); unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits(); unsigned Alignment = cast(VCP)->getAlignment(); @@ -8055,7 +8084,7 @@ return DAG.getBitcast(VT, Imm); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, - DAG.getIntPtrConstant(0, dl)); + DAG.getIntPtrConstant(0, dl)); } // Vector has one or more non-const elements @@ -8092,8 +8121,7 @@ if (Immediate) { MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8)); Imm = DAG.getConstant(Immediate, dl, ImmVT); - } - else if (HasConstElts) + } else if (HasConstElts) Imm = DAG.getConstant(0, dl, VT); else Imm = DAG.getUNDEF(VT); @@ -8135,9 +8163,8 @@ /// See the corrected implementation in isHopBuildVector(). Can we reduce this /// code because it is only used for partial h-op matching now? static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, - SelectionDAG &DAG, - unsigned BaseIdx, unsigned LastIdx, - SDValue &V0, SDValue &V1) { + SelectionDAG &DAG, unsigned BaseIdx, + unsigned LastIdx, SDValue &V0, SDValue &V1) { EVT VT = N->getValueType(0); assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops"); assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); @@ -8175,10 +8202,10 @@ // Try to match the following pattern: // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1)) CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Op0.getOperand(0) == Op1.getOperand(0) && - isa(Op0.getOperand(1)) && - isa(Op1.getOperand(1))); + Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0) == Op1.getOperand(0) && + isa(Op0.getOperand(1)) && + isa(Op1.getOperand(1))); if (!CanFold) break; @@ -8258,9 +8285,9 @@ unsigned NumElts = VT.getVectorNumElements(); SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL); - SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL); + SDValue V0_HI = extract128BitVector(V0, NumElts / 2, DAG, DL); SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL); - SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL); + SDValue V1_HI = extract128BitVector(V1, NumElts / 2, DAG, DL); MVT NewVT = V0_LO.getSimpleValueType(); SDValue LO = DAG.getUNDEF(NewVT); @@ -8292,8 +8319,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, - unsigned &NumExtracts, - bool &IsSubAdd) { + unsigned &NumExtracts, bool &IsSubAdd) { MVT VT = BV->getSimpleValueType(0); if (!Subtarget.hasSSE3() || !VT.isFloatingPoint()) @@ -8380,8 +8406,8 @@ // Ensure we have found an opcode for both parities and that they are // different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the // inputs are undef. - if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || - InVec0.isUndef() || InVec1.isUndef()) + if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || InVec0.isUndef() || + InVec1.isUndef()) return false; IsSubAdd = Opc[0] == ISD::FADD; @@ -8394,7 +8420,8 @@ /// Returns true if is possible to fold MUL and an idiom that has already been /// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into /// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the -/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2. +/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, +/// \p Opnd2. /// /// Prior to calling this function it should be known that there is some /// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation @@ -8418,8 +8445,8 @@ /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit /// FMADDSUB is. static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, - SelectionDAG &DAG, - SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, + SelectionDAG &DAG, SDValue &Opnd0, + SDValue &Opnd1, SDValue &Opnd2, unsigned ExpectedUses) { if (Opnd0.getOpcode() != ISD::FMUL || !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) @@ -8511,11 +8538,20 @@ if (HOpcode == ISD::DELETED_NODE) { GenericOpcode = Op.getOpcode(); switch (GenericOpcode) { - case ISD::ADD: HOpcode = X86ISD::HADD; break; - case ISD::SUB: HOpcode = X86ISD::HSUB; break; - case ISD::FADD: HOpcode = X86ISD::FHADD; break; - case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; - default: return false; + case ISD::ADD: + HOpcode = X86ISD::HADD; + break; + case ISD::SUB: + HOpcode = X86ISD::HSUB; + break; + case ISD::FADD: + HOpcode = X86ISD::FHADD; + break; + case ISD::FSUB: + HOpcode = X86ISD::FHSUB; + break; + default: + return false; } } @@ -8545,8 +8581,7 @@ // op (extract_vector_elt A, I), (extract_vector_elt A, I+1) unsigned ExtIndex0 = Op0.getConstantOperandVal(1); unsigned ExtIndex1 = Op1.getConstantOperandVal(1); - unsigned ExpectedIndex = i * NumEltsIn128Bits + - (j % NumEltsIn64Bits) * 2; + unsigned ExpectedIndex = i * NumEltsIn128Bits + (j % NumEltsIn64Bits) * 2; if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1) continue; @@ -9132,8 +9167,8 @@ return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); } -SDValue -X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); @@ -9159,7 +9194,7 @@ unsigned EVTBits = EltVT.getSizeInBits(); - unsigned NumZero = 0; + unsigned NumZero = 0; unsigned NumNonZero = 0; uint64_t NonZeros = 0; bool IsAllConstants = true; @@ -9275,7 +9310,7 @@ if (EltVT == MVT::i16 || EltVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); if (VT.getSizeInBits() >= 256) { - MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); + MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); if (Subtarget.hasAVX()) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); @@ -9296,14 +9331,13 @@ } // Is it a vector logical left shift? - if (NumElems == 2 && Idx == 1 && - X86::isZeroNode(Op.getOperand(0)) && + if (NumElems == 2 && Idx == 1 && X86::isZeroNode(Op.getOperand(0)) && !X86::isZeroNode(Op.getOperand(1))) { unsigned NumBits = VT.getSizeInBits(); - return getVShift(true, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - VT, Op.getOperand(1)), - NumBits/2, DAG, *this, dl); + return getVShift( + true, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(1)), + NumBits / 2, DAG, *this, dl); } if (IsAllConstants) // Otherwise, it's better to do a constpool load. @@ -9316,7 +9350,8 @@ // place. if (EVTBits == 32) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG); + return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, + DAG); } } @@ -9341,7 +9376,7 @@ return SDValue(); if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget)) - return V; + return V; // See if we can use a vector load to get all of the elements. { @@ -9355,8 +9390,8 @@ // build_vector and broadcast it. // TODO: We could probably generalize this more. if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { - SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), - DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; + SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT), + DAG.getUNDEF(EltVT)}; auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef Ops) { // Make sure all the even/odd operands match. for (unsigned i = 2; i != NumElems; ++i) @@ -9371,22 +9406,22 @@ SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), DAG.getBuildVector(NarrowVT, dl, Ops)); // Broadcast from v2i64/v2f64 and cast to final VT. - MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2); - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, - NewBV)); + MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, NewBV)); } } // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. if (VT.getSizeInBits() > 128) { - MVT HVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT HVT = MVT::getVectorVT(EltVT, NumElems / 2); // Build both the lower and upper subvector. SDValue Lower = DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2)); SDValue Upper = DAG.getBuildVector( - HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); + HVT, dl, Op->ops().slice(NumElems / 2, NumElems / 2)); // Recreate the wider vector with the lower and upper part. return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl, @@ -9398,8 +9433,8 @@ if (NumNonZero == 1) { // One half is zero or undef. unsigned Idx = countTrailingZeros(NonZeros); - SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, - Op.getOperand(Idx)); + SDValue V2 = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); @@ -9433,31 +9468,29 @@ } for (unsigned i = 0; i < 2; ++i) { - switch ((NonZeros >> (i*2)) & 0x3) { - default: llvm_unreachable("Unexpected NonZero count"); - case 0: - Ops[i] = Ops[i*2]; // Must be a zero vector. - break; - case 1: - Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]); - break; - case 2: - Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); - break; - case 3: - Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); - break; + switch ((NonZeros >> (i * 2)) & 0x3) { + default: + llvm_unreachable("Unexpected NonZero count"); + case 0: + Ops[i] = Ops[i * 2]; // Must be a zero vector. + break; + case 1: + Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2 + 1], Ops[i * 2]); + break; + case 2: + Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]); + break; + case 3: + Ops[i] = getUnpackl(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]); + break; } } bool Reverse1 = (NonZeros & 0x3) == 2; bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2; - int MaskVec[] = { - Reverse1 ? 1 : 0, - Reverse1 ? 0 : 1, - static_cast(Reverse2 ? NumElems+1 : NumElems), - static_cast(Reverse2 ? NumElems : NumElems+1) - }; + int MaskVec[] = {Reverse1 ? 1 : 0, Reverse1 ? 0 : 1, + static_cast(Reverse2 ? NumElems + 1 : NumElems), + static_cast(Reverse2 ? NumElems : NumElems + 1)}; return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec); } @@ -9476,7 +9509,8 @@ Result = DAG.getUNDEF(VT); for (unsigned i = 1; i < NumElems; ++i) { - if (Op.getOperand(i).isUndef()) continue; + if (Op.getOperand(i).isUndef()) + continue; Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } @@ -9501,14 +9535,14 @@ for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { // Generate scaled UNPCKL shuffle mask. SmallVector Mask; - for(unsigned i = 0; i != Scale; ++i) + for (unsigned i = 0; i != Scale; ++i) Mask.push_back(i); for (unsigned i = 0; i != Scale; ++i) - Mask.push_back(NumElems+i); + Mask.push_back(NumElems + i); Mask.append(NumElems - Mask.size(), SM_SentinelUndef); for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) - Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); + Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2 * i], Ops[(2 * i) + 1], Mask); } return Ops[0]; } @@ -9521,8 +9555,8 @@ SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); - assert((ResVT.is256BitVector() || - ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); + assert((ResVT.is256BitVector() || ResVT.is512BitVector()) && + "Value type must be 256-/512-bit wide"); unsigned NumOperands = Op.getNumOperands(); unsigned NumZero = 0; @@ -9544,18 +9578,18 @@ // If we have more than 2 non-zeros, build each half separately. if (NumNonZero > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), - ResVT.getVectorNumElements()/2); + ResVT.getVectorNumElements() / 2); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, - Ops.slice(0, NumOperands/2)); + Ops.slice(0, NumOperands / 2)); SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, - Ops.slice(NumOperands/2)); + Ops.slice(NumOperands / 2)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } // Otherwise, build it up through insert_subvectors. - SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) - : DAG.getUNDEF(ResVT); + SDValue Vec = + NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) : DAG.getUNDEF(ResVT); MVT SubVT = Op.getOperand(0).getSimpleValueType(); unsigned NumSubElems = SubVT.getVectorNumElements(); @@ -9563,8 +9597,7 @@ if ((NonZeros & (1 << i)) == 0) continue; - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, - Op.getOperand(i), + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(i), DAG.getIntPtrConstant(i * NumSubElems, dl)); } @@ -9628,7 +9661,7 @@ // TODO: Merge this with LowerAVXCONCAT_VECTORS? static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, - SelectionDAG & DAG) { + SelectionDAG &DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); unsigned NumOperands = Op.getNumOperands(); @@ -9659,7 +9692,6 @@ } } - // If there are zero or one non-zeros we can handle this very simply. if (NumNonZero <= 1) { SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) @@ -9675,12 +9707,12 @@ if (NumOperands > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), - ResVT.getVectorNumElements()/2); + ResVT.getVectorNumElements() / 2); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, - Ops.slice(0, NumOperands/2)); + Ops.slice(0, NumOperands / 2)); SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, - Ops.slice(NumOperands/2)); + Ops.slice(NumOperands / 2)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } @@ -9689,24 +9721,23 @@ if (ResVT.getVectorNumElements() >= 16) return Op; // The operation is legal with KUNPCK - SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, - DAG.getUNDEF(ResVT), Op.getOperand(0), - DAG.getIntPtrConstant(0, dl)); + SDValue Vec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, DAG.getUNDEF(ResVT), + Op.getOperand(0), DAG.getIntPtrConstant(0, dl)); unsigned NumElems = ResVT.getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), - DAG.getIntPtrConstant(NumElems/2, dl)); + DAG.getIntPtrConstant(NumElems / 2, dl)); } -static SDValue LowerCONCAT_VECTORS(SDValue Op, - const X86Subtarget &Subtarget, +static SDValue LowerCONCAT_VECTORS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT.getVectorElementType() == MVT::i1) return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); assert((VT.is256BitVector() && Op.getNumOperands() == 2) || - (VT.is512BitVector() && (Op.getNumOperands() == 2 || - Op.getNumOperands() == 4))); + (VT.is512BitVector() && + (Op.getNumOperands() == 2 || Op.getNumOperands() == 4))); // AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. @@ -9783,8 +9814,8 @@ // Ok, handle the in-lane shuffles by detecting if and when they repeat. // Adjust second vector indices to start at LaneSize instead of Size. - int LocalM = Mask[i] < Size ? Mask[i] % LaneSize - : Mask[i] % LaneSize + LaneSize; + int LocalM = + Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize; if (RepeatedMask[i % LaneSize] < 0) // This is the first non-undef entry in this slot of a 128-bit lane. RepeatedMask[i % LaneSize] = LocalM; @@ -9802,8 +9833,7 @@ return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); } -static bool -is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask) { +static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask) { SmallVector RepeatedMask; return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); } @@ -9895,7 +9925,8 @@ /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding /// value in ExpectedMask is always accepted. Otherwise the indices must match. /// -/// SM_SentinelZero is accepted as a valid negative index but must match in both. +/// SM_SentinelZero is accepted as a valid negative index but must match in +/// both. static bool isTargetShuffleEquivalent(ArrayRef Mask, ArrayRef ExpectedMask) { int Size = Mask.size(); @@ -10040,8 +10071,8 @@ /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// as many lanes with this technique as possible to simplify the remaining /// shuffle. -static APInt computeZeroableShuffleElements(ArrayRef Mask, - SDValue V1, SDValue V2) { +static APInt computeZeroableShuffleElements(ArrayRef Mask, SDValue V1, + SDValue V2) { APInt Zeroable(Mask.size(), 0); V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); @@ -10117,8 +10148,8 @@ // // The function looks for a sub-mask that the nonzero elements are in // increasing order. If such sub-mask exist. The function returns true. -static bool isNonZeroElementsInOrder(const APInt &Zeroable, - ArrayRef Mask, const EVT &VectorType, +static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef Mask, + const EVT &VectorType, bool &IsZeroSideLeft) { int NextElement = -1; // Check if the Mask's nonzero elements are in increasing order. @@ -10202,9 +10233,8 @@ // X86 has dedicated shuffle that can be lowered to VEXPAND static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, - const APInt &Zeroable, - ArrayRef Mask, SDValue &V1, - SDValue &V2, SelectionDAG &DAG, + const APInt &Zeroable, ArrayRef Mask, + SDValue &V1, SDValue &V2, SelectionDAG &DAG, const X86Subtarget &Subtarget) { bool IsLeftZeroSide = true; if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), @@ -10337,7 +10367,7 @@ } static bool matchVectorShuffleAsVPMOV(ArrayRef Mask, bool SwappedOps, - int Delta) { + int Delta) { int Size = (int)Mask.size(); int Split = Size / Delta; int TruncatedVectorStart = SwappedOps ? Size : 0; @@ -10753,9 +10783,9 @@ for (int i = 0, Size = Mask.size(); i < Size; ++i) for (int j = 0; j < Scale; ++j) VSELECTMask.push_back( - Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) - : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, - MVT::i8)); + Mask[i] < 0 + ? DAG.getUNDEF(MVT::i8) + : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, MVT::i8)); V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); @@ -10897,9 +10927,11 @@ /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then /// permuting the elements of the result in place. -static SDValue lowerShuffleAsByteRotateAndPermute( - const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget &Subtarget, SelectionDAG &DAG) { +static SDValue lowerShuffleAsByteRotateAndPermute(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || (VT.is256BitVector() && !Subtarget.hasAVX2()) || (VT.is512BitVector() && !Subtarget.hasBWI())) @@ -11012,18 +11044,18 @@ // pre-shuffle first is a better strategy. if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) { // Only prefer immediate blends to unpack/rotate. - if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, - DAG, true)) + if (SDValue BlendPerm = + lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG, true)) return BlendPerm; - if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, - DAG)) + if (SDValue UnpackPerm = + lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG)) return UnpackPerm; if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute( DL, VT, V1, V2, Mask, Subtarget, DAG)) return RotatePerm; // Unpack/rotate failed - try again with variable blends. - if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, - DAG)) + if (SDValue BlendPerm = + lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG)) return BlendPerm; } @@ -11049,7 +11081,7 @@ SDValue Lo, Hi; for (int i = 0; i < NumElts; ++i) { int M = Mask[i]; - assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) && + assert((M == SM_SentinelUndef || (0 <= M && M < (2 * NumElts))) && "Unexpected mask index."); if (M < 0) continue; @@ -11171,8 +11203,7 @@ "Rotate-based lowering only supports 128-bit lowering!"); assert(Mask.size() <= 16 && "Can shuffle at most 16 bytes in a 128-bit vector!"); - assert(ByteVT == MVT::v16i8 && - "SSE2 rotate lowering only needed for v16i8!"); + assert(ByteVT == MVT::v16i8 && "SSE2 rotate lowering only needed for v16i8!"); // Default SSE2 implementation int LoByteShift = 16 - ByteRotation; @@ -11204,8 +11235,9 @@ "Only 32-bit and 64-bit elements are supported!"); // 128/256-bit vectors are only supported with VLX. - assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) - && "VLX required for 128/256-bit vectors"); + assert( + (Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) && + "VLX required for 128/256-bit vectors"); SDValue Lo = V1, Hi = V2; int Rotation = matchShuffleAsRotate(Lo, Hi, Mask); @@ -11592,7 +11624,7 @@ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); InputV = ShuffleOffset(InputV); - InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG); + InputV = getExtendInVec(/*Signed*/ false, DL, ExtVT, InputV, DAG); return DAG.getBitcast(VT, InputV); } @@ -11708,8 +11740,7 @@ /// are both incredibly common and often quite performance sensitive. static SDValue lowerShuffleAsZeroOrAnyExtend( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const APInt &Zeroable, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Bits = VT.getSizeInBits(); int NumLanes = Bits / 128; int NumElements = VT.getVectorNumElements(); @@ -11834,7 +11865,8 @@ // If the bitcasts shift the element size, we can't extract an equivalent // element from it. MVT NewVT = V.getSimpleValueType(); - if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) + if (!NewVT.isVector() || + NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); if (V.getOpcode() == ISD::BUILD_VECTOR || @@ -11864,8 +11896,7 @@ /// across all subtarget feature sets. static SDValue lowerShuffleAsElementInsertion( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const APInt &Zeroable, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); @@ -11884,8 +11915,8 @@ // all the smarts here sunk into that routine. However, the current // lowering of BUILD_VECTOR makes that nearly impossible until the old // vector shuffle lowering is dead. - SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), - DAG); + SDValue V2S = + getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG); if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { // We need to zext the scalar if it is smaller than an i32. V2S = DAG.getBitcast(EltVT, V2S); @@ -12077,8 +12108,8 @@ NewMask.append(NumElts, -1); // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0 - SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), - NewMask); + SDValue Shuf = + DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), NewMask); // This is free: ymm -> xmm. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf, DAG.getIntPtrConstant(0, DL)); @@ -12282,8 +12313,8 @@ // elements are zeroable. static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, unsigned &InsertPSMask, - const APInt &Zeroable, - ArrayRef Mask, SelectionDAG &DAG) { + const APInt &Zeroable, ArrayRef Mask, + SelectionDAG &DAG) { assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); @@ -12393,13 +12424,14 @@ /// because for floating point vectors we have a generalized SHUFPS lowering /// strategy that handles everything that doesn't *exactly* match an unpack, /// making this clever lowering unnecessary. -static SDValue lowerShuffleAsPermuteAndUnpack( - const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget &Subtarget, SelectionDAG &DAG) { +static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "This routine only supports integer vectors."); - assert(VT.is128BitVector() && - "This routine only works on 128-bit vectors."); + assert(VT.is128BitVector() && "This routine only works on 128-bit vectors."); assert(!V2.isUndef() && "This routine should only be used when blending two inputs."); assert(Mask.size() >= 2 && "Single element masks are invalid."); @@ -12447,7 +12479,8 @@ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); // Cast the inputs to the type we will use to unpack them. - MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale); + MVT UnpackVT = + MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale); V1 = DAG.getBitcast(UnpackVT, V1); V2 = DAG.getBitcast(UnpackVT, V2); @@ -12493,8 +12526,9 @@ 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); } return DAG.getVectorShuffle( - VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, - DL, VT, V1, V2), + VT, DL, + DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT, + V1, V2), DAG.getUNDEF(VT), PermMask); } @@ -12833,8 +12867,8 @@ // when the V2 input is targeting element 0 of the mask -- that is the fast // case here. if (NumV2Elements == 1 && Mask[0] >= 4) - if (SDValue V = lowerShuffleAsElementInsertion( - DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; if (Subtarget.hasSSE41()) { @@ -12847,8 +12881,8 @@ return V; if (!isSingleSHUFPSMask(Mask)) - if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, - V2, Mask, DAG)) + if (SDValue BlendPerm = + lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, V2, Mask, DAG)) return BlendPerm; } @@ -12923,8 +12957,8 @@ // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) - if (SDValue V = lowerShuffleAsElementInsertion( - DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; // We have different paths for blend lowering, but they all must use the @@ -13056,7 +13090,7 @@ }; if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) { - int PSHUFDMask[4] = { -1, -1, -1, -1 }; + int PSHUFDMask[4] = {-1, -1, -1, -1}; SmallVector, 4> DWordPairs; int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2); @@ -13152,7 +13186,8 @@ int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); int TripleNonInputIdx = - TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); + TripleInputSum - + std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); TripleDWord = TripleNonInputIdx / 2; // We use xor with one to compute the adjacent DWord to whichever one the @@ -13232,9 +13267,9 @@ // Adjust the mask to match the new locations of A and B. for (int &M : Mask) - if (M >= 0 && M/2 == ADWord) + if (M >= 0 && M / 2 == ADWord) M = 2 * BDWord + M % 2; - else if (M >= 0 && M/2 == BDWord) + else if (M >= 0 && M / 2 == BDWord) M = 2 * ADWord + M % 2; // Recurse back into this routine to re-compute state now that this isn't @@ -13258,10 +13293,11 @@ // First fix the masks for all the inputs that are staying in their // original halves. This will then dictate the targets of the cross-half // shuffles. - auto fixInPlaceInputs = - [&PSHUFDMask](ArrayRef InPlaceInputs, ArrayRef IncomingInputs, - MutableArrayRef SourceHalfMask, - MutableArrayRef HalfMask, int HalfOffset) { + auto fixInPlaceInputs = [&PSHUFDMask](ArrayRef InPlaceInputs, + ArrayRef IncomingInputs, + MutableArrayRef SourceHalfMask, + MutableArrayRef HalfMask, + int HalfOffset) { if (InPlaceInputs.empty()) return; if (InPlaceInputs.size() == 1) { @@ -13297,10 +13333,12 @@ // FIXME: This operation could almost certainly be simplified dramatically to // look more like the 3-1 fixing operation. auto moveInputsToRightHalf = [&PSHUFDMask]( - MutableArrayRef IncomingInputs, ArrayRef ExistingInputs, - MutableArrayRef SourceHalfMask, MutableArrayRef HalfMask, - MutableArrayRef FinalSourceHalfMask, int SourceOffset, - int DestOffset) { + MutableArrayRef IncomingInputs, + ArrayRef ExistingInputs, + MutableArrayRef SourceHalfMask, + MutableArrayRef HalfMask, + MutableArrayRef FinalSourceHalfMask, + int SourceOffset, int DestOffset) { auto isWordClobbered = [](ArrayRef SourceHalfMask, int Word) { return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word; }; @@ -13497,9 +13535,11 @@ /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the /// blend if only one input is used. -static SDValue lowerShuffleAsBlendOfPSHUFBs( - const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { +static SDValue lowerShuffleAsBlendOfPSHUFBs(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + SelectionDAG &DAG, bool &V1InUse, + bool &V2InUse) { assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && "Lane crossing shuffle masks not supported"); @@ -13592,8 +13632,8 @@ return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use byte rotation instructions. @@ -13618,14 +13658,14 @@ // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) - if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue V = + lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG)) return V; // There are special ways we can lower some single-element blends. if (NumV2Inputs == 1) - if (SDValue V = lowerShuffleAsElementInsertion( - DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; // We have different paths for blend lowering, but they all must use the @@ -13645,8 +13685,8 @@ return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use byte rotation instructions. @@ -13672,14 +13712,14 @@ // can both shuffle and set up the inefficient blend. if (!IsBlendSupported && Subtarget.hasSSSE3()) { bool V1InUse, V2InUse; - return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, DAG, V1InUse, V2InUse); + return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, Zeroable, + DAG, V1InUse, V2InUse); } // We can always bit-blend if we have to so the fallback strategy is to // decompose into single-input permutes and blends. - return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, Mask, + Subtarget, DAG); } /// Check whether a compaction lowering can be done by dropping even @@ -13749,8 +13789,8 @@ } static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { + ArrayRef Mask, SDValue V1, SDValue V2, + SelectionDAG &DAG) { MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); @@ -13787,8 +13827,8 @@ return Rotate; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use a zext lowering. @@ -13798,8 +13838,8 @@ // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) - if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue V = + lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG)) return V; int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); @@ -13851,7 +13891,7 @@ int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; SmallDenseMap LaneMap; for (int I : InPlaceInputs) { - PreDupI16Shuffle[I/2] = I/2; + PreDupI16Shuffle[I / 2] = I / 2; LaneMap[I] = I; } int j = TargetLo ? 0 : 4, je = j + 4; @@ -13865,7 +13905,8 @@ ++j; if (j == je) - // We can't place the inputs into a single half with a simple i16 shuffle, so bail. + // We can't place the inputs into a single half with a simple i16 + // shuffle, so bail. return SDValue(); // Map this input with the i16 shuffle. @@ -13964,8 +14005,8 @@ // Use PALIGNR+Permute if possible - permute might become PSHUFB but the // PALIGNR will be cheaper than the second PSHUFB+OR. - if (SDValue V = lowerShuffleAsByteRotateAndPermute( - DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v16i8, V1, V2, + Mask, Subtarget, DAG)) return V; } @@ -13974,8 +14015,8 @@ // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) - if (SDValue V = lowerShuffleAsElementInsertion( - DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) @@ -13999,7 +14040,7 @@ "No support for dropping even elements more than 3 times."); // We use the mask type to pick which bytes are preserved based on how many // elements are dropped. - MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; + MVT MaskVTs[] = {MVT::v8i16, MVT::v4i32, MVT::v2i64}; SDValue ByteClearMask = DAG.getBitcast( MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); @@ -14056,8 +14097,8 @@ if (M >= 0) M /= 2; } else { - // Otherwise just unpack the low half of V into VLoHalf and the high half into - // VHiHalf so that we can blend them as i16s. + // Otherwise just unpack the low half of V into VLoHalf and the high half + // into VHiHalf so that we can blend them as i16s. SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL); VLoHalf = DAG.getBitcast( @@ -14066,8 +14107,10 @@ MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); } - SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); - SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); + SDValue LoV = + DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); + SDValue HiV = + DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); } @@ -14076,9 +14119,8 @@ /// /// This routine breaks down the specific type of 128-bit shuffle and /// dispatches to the lowering routines accordingly. -static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, +static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { switch (VT.SimpleTy) { @@ -14197,8 +14239,7 @@ SDValue V1Blend, V2Blend; if (UseLoV1 && UseHiV1) { - V1Blend = - DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); + V1Blend = DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); } else { // We only use half of V1 so map the usage down into the final blend mask. V1Blend = UseLoV1 ? LoV1 : HiV1; @@ -14207,8 +14248,7 @@ BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); } if (UseLoV2 && UseHiV2) { - V2Blend = - DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); + V2Blend = DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); } else { // We only use half of V2 so map the usage down into the final blend mask. V2Blend = UseLoV2 ? LoV2 : HiV2; @@ -14236,7 +14276,7 @@ const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!V2.isUndef() && "This routine must not be used to lower single-input " - "shuffles as it could then recurse on itself."); + "shuffles as it could then recurse on itself."); int Size = Mask.size(); // If this can be modeled as a broadcast of two elements followed by a blend, @@ -14259,8 +14299,8 @@ return true; }; if (DoBothBroadcast()) - return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget, + DAG); // If the inputs all stem from a single 128-bit lane of each input, then we // split them rather than blending because the split will decompose to @@ -14387,16 +14427,17 @@ SmallVector FlippedBlendMask(Size); for (int i = 0; i < Size; ++i) FlippedBlendMask[i] = - Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize) - ? Mask[i] - : Mask[i] % LaneSize + - (i / LaneSize) * LaneSize + Size); + Mask[i] < 0 + ? -1 + : (((Mask[i] % Size) / LaneSize == i / LaneSize) + ? Mask[i] + : Mask[i] % LaneSize + (i / LaneSize) * LaneSize + Size); // Flip the vector, and blend the results which should now be in-lane. MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; SDValue Flipped = DAG.getBitcast(PVT, V1); - Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), - { 2, 3, 0, 1 }); + Flipped = + DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1}); Flipped = DAG.getBitcast(VT, Flipped); return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); } @@ -14433,8 +14474,8 @@ // instruction bytes needed to explicitly generate the zero vector. // Blends are faster and handle all the non-lane-crossing cases. - if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, - Subtarget, DAG)) + if (SDValue Blend = + lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // If either input operand is a zero vector, use VPERM2X128 because its mask @@ -14449,9 +14490,9 @@ // this will likely become vinsertf128 which can't fold a 256-bit memop. if (!isa(peekThroughBitcasts(V1))) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); - SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - OnlyUsesV1 ? V1 : V2, - DAG.getIntPtrConstant(0, DL)); + SDValue SubVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, DAG.getIntPtrConstant(2, DL)); } @@ -14460,10 +14501,10 @@ // Try to use SHUF128 if possible. if (Subtarget.hasVLX()) { if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { - unsigned PermMask = ((WidenedMask[0] % 2) << 0) | - ((WidenedMask[1] % 2) << 1); - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + unsigned PermMask = + ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1); + return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + DAG.getConstant(PermMask, DL, MVT::i8)); } } } @@ -14485,7 +14526,7 @@ (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?"); unsigned PermMask = 0; - PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); + PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); // Check the immediate mask and replace unused sources with undef. @@ -14522,7 +14563,7 @@ // First pass will try to fill in the RepeatMask from lanes that need two // sources. for (int Lane = 0; Lane != NumLanes; ++Lane) { - int Srcs[2] = { -1, -1 }; + int Srcs[2] = {-1, -1}; SmallVector InLaneMask(LaneSize, -1); for (int i = 0; i != LaneSize; ++i) { int M = Mask[(Lane * LaneSize) + i]; @@ -14673,9 +14714,9 @@ /// adjusted to access the extracted halves of the original shuffle operands is /// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or /// lower half of each input operand is accessed. -static bool -getHalfShuffleMask(ArrayRef Mask, MutableArrayRef HalfMask, - int &HalfIdx1, int &HalfIdx2) { +static bool getHalfShuffleMask(ArrayRef Mask, + MutableArrayRef HalfMask, int &HalfIdx1, + int &HalfIdx2) { assert((Mask.size() == HalfMask.size() * 2) && "Expected input mask to be twice as long as output"); @@ -15090,7 +15131,7 @@ static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { - assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&& + assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && "Unexpected data type for VSHUFPD"); unsigned Immediate = 0; @@ -15204,8 +15245,8 @@ Subtarget, DAG); // Otherwise fall back on generic lowering. - return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, + DAG); } /// Handle lowering of 4-lane 64-bit integer shuffles. @@ -15230,8 +15271,8 @@ return Blend; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Broadcast = + lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Broadcast; if (V2.isUndef()) { @@ -15261,8 +15302,8 @@ // If we have VLX support, we can use VALIGN or VEXPAND. if (Subtarget.hasVLX()) { - if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Rotate = + lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, @@ -15323,8 +15364,8 @@ return Blend; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Broadcast = + lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane, we have many more @@ -15370,8 +15411,8 @@ return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); // Otherwise, fall back. - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, DAG, + Subtarget); } // Try to simplify this by merging 128-bit lanes to enable a lane-based @@ -15401,8 +15442,8 @@ Subtarget, DAG); // Otherwise fall back on generic lowering. - return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, + DAG); } /// Handle lowering of 8-lane 32-bit integer shuffles. @@ -15439,8 +15480,8 @@ return Blend; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Broadcast = + lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane we can use more @@ -15467,8 +15508,8 @@ // If we have VLX support, we can use VALIGN or EXPAND. if (Subtarget.hasVLX()) { - if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Rotate = + lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, @@ -15550,8 +15591,8 @@ return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use shift instructions. @@ -15587,8 +15628,8 @@ // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v16 case. - return lowerV8I16GeneralSingleInputShuffle( - DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); + return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v16i16, V1, + RepeatedMask, Subtarget, DAG); } } @@ -15607,13 +15648,13 @@ return Result; // Try to permute the lanes and then use a per-lane permute. - if (SDValue V = lowerShuffleAsLanePermuteAndPermute( - DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) + if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v16i16, V1, V2, + Mask, DAG, Subtarget)) return V; // Otherwise fall back on generic lowering. - return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, Subtarget, + DAG); } /// Handle lowering of 32-lane 8-bit integer shuffles. @@ -15637,8 +15678,8 @@ return ZExt; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Broadcast = + lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Broadcast; if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, @@ -15650,13 +15691,13 @@ return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use shift instructions. if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -15673,8 +15714,8 @@ // There are no generalized cross-lane shuffle operations available on i8 // element types. if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) { - if (SDValue V = lowerShuffleAsLanePermuteAndPermute( - DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) + if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2, + Mask, DAG, Subtarget)) return V; return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG, @@ -15696,13 +15737,13 @@ return Result; // Try to permute the lanes and then use a per-lane permute. - if (SDValue V = lowerShuffleAsLanePermuteAndPermute( - DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) + if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2, + Mask, DAG, Subtarget)) return V; // Otherwise fall back on generic lowering. - return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, Subtarget, + DAG); } /// High-level routine to lower various 256-bit x86 vector shuffles. @@ -15805,14 +15846,13 @@ // Check for patterns which can be matched with a single insert of a 256-bit // subvector. - bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, - {0, 1, 2, 3, 0, 1, 2, 3}); - if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, - {0, 1, 2, 3, 8, 9, 10, 11})) { + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 0, 1, 2, 3}); + if (OnlyUsesV1 || + isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 8, 9, 10, 11})) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); - SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - OnlyUsesV1 ? V1 : V2, - DAG.getIntPtrConstant(0, DL)); + SDValue SubVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, DAG.getIntPtrConstant(4, DL)); } @@ -15966,15 +16006,14 @@ // If we have a single input shuffle with different shuffle patterns in the // 128-bit lanes and don't lane cross, use variable mask VPERMILPS. - if (V2.isUndef() && - !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) { + if (V2.isUndef() && !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) { SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true); return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask); } // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); @@ -16020,8 +16059,8 @@ return Shift; // Try to use VALIGN. - if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Rotate = + lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to use PALIGNR. @@ -16082,8 +16121,8 @@ return Shift; // Try to use VALIGN. - if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Rotate = + lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to use byte rotation instructions. @@ -16149,13 +16188,13 @@ // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v32 case. - return lowerV8I16GeneralSingleInputShuffle( - DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG); + return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1, + RepeatedMask, Subtarget, DAG); } } if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2, @@ -16178,8 +16217,8 @@ // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( - DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v64i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return ZExt; // Use dedicated unpack instructions for masks that match their pattern. @@ -16187,8 +16226,8 @@ return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use shift instructions. @@ -16235,13 +16274,11 @@ /// This routine either breaks down the specific type of a 512-bit x86 vector /// shuffle or splits it into two 256-bit shuffles and fuses the results back /// together based on the available instructions. -static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, +static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(Subtarget.hasAVX512() && - "Cannot lower 512-bit vectors w/ basic ISA!"); + assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); // If we have a single input to the zero element, insert that into V1 if we // can do so cheaply. @@ -16259,8 +16296,8 @@ return V; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Broadcast = + lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Dispatch to each element type for lowering. If we don't have support for @@ -16318,14 +16355,12 @@ return -1; } - // Lower vXi1 vector shuffles. // There is no a dedicated instruction on AVX-512 that shuffles the masks. // The only way to shuffle bits is to sign-extend the mask vector to SIMD // vector, shuffle and then truncate it back. -static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, +static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && @@ -16350,18 +16385,18 @@ // not covered by the subvector. if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); - SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, - V1, DAG.getIntPtrConstant(0, DL)); + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - getZeroVector(VT, Subtarget, DAG, DL), - Extract, DAG.getIntPtrConstant(0, DL)); + getZeroVector(VT, Subtarget, DAG, DL), Extract, + DAG.getIntPtrConstant(0, DL)); } // Try to match KSHIFTs. // TODO: Support narrower than legal shifts by widening and extracting. if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) { unsigned Offset = 0; - for (SDValue V : { V1, V2 }) { + for (SDValue V : {V1, V2}) { unsigned Opcode; int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); if (ShiftAmt >= 0) @@ -16371,7 +16406,6 @@ } } - MVT ExtVT; switch (VT.SimpleTy) { default: @@ -16411,8 +16445,8 @@ int NumElems = VT.getVectorNumElements(); if ((Subtarget.hasBWI() && (NumElems >= 32)) || (Subtarget.hasDQI() && (NumElems < 32))) - return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), - Shuffle, ISD::SETGT); + return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), Shuffle, + ISD::SETGT); return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); } @@ -16526,7 +16560,8 @@ } // Check for illegal shuffle mask element index values. - int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit; + int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); + (void)MaskUpperLimit; assert(llvm::all_of(Mask, [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && "Out of bounds shuffle index"); @@ -16561,8 +16596,8 @@ // by obfuscating the operands with bitcasts. // TODO: Avoid lowering directly from this top-level function: make this // a query (canLowerAsBroadcast) and defer lowering to the type-based calls. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, - Subtarget, DAG)) + if (SDValue Broadcast = + lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) return Broadcast; MVT NewEltVT = VT.isFloatingPoint() @@ -16677,8 +16712,7 @@ // Build a mask by testing the condition against zero. MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond, - DAG.getConstant(0, dl, CondVT), - ISD::SETNE); + DAG.getConstant(0, dl, CondVT), ISD::SETNE); // Now return a new VSELECT using the mask. return DAG.getSelect(dl, VT, Mask, LHS, RHS); } @@ -16745,8 +16779,7 @@ if (!Op.hasOneUse()) return SDValue(); SDNode *User = *Op.getNode()->use_begin(); - if ((User->getOpcode() != ISD::STORE || - isNullConstant(Op.getOperand(1))) && + if ((User->getOpcode() != ISD::STORE || isNullConstant(Op.getOperand(1))) && (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); @@ -16800,9 +16833,9 @@ MVT WideVecVT = VecVT; if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, - DAG.getUNDEF(WideVecVT), Vec, - DAG.getIntPtrConstant(0, dl)); + Vec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, + DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl)); } // Use kshiftr instruction to move to the lower element. @@ -16813,9 +16846,8 @@ DAG.getIntPtrConstant(0, dl)); } -SDValue -X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); MVT VecVT = Vec.getSimpleValueType(); @@ -16850,10 +16882,10 @@ // | | Ports pressure in cycles | | // |Uops| 1 | 2 - D |3 - D | 4 | 5 | | // --------------------------------------------------------- - // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0 - // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] - // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1] - // Total Num Of Uops: 4 + // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], + // xmm0 |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] |1 + // | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1] Total Num + // Of Uops: 4 return SDValue(); } @@ -16933,7 +16965,7 @@ return Op; // SHUFPS the element to the lowest double word, then movss. - int Mask[4] = { static_cast(IdxVal), -1, -1, -1 }; + int Mask[4] = {static_cast(IdxVal), -1, -1, -1}; Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0, dl)); @@ -16949,7 +16981,7 @@ // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - int Mask[2] = { 1, -1 }; + int Mask[2] = {1, -1}; Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0, dl)); @@ -16974,9 +17006,10 @@ unsigned NumElts = VecVT.getVectorNumElements(); MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); - SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, - DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), - DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); + SDValue ExtOp = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, + DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), + DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); } @@ -17188,9 +17221,9 @@ MVT WideVecVT = VecVT; if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, - DAG.getUNDEF(WideVecVT), Vec, - DAG.getIntPtrConstant(0, dl)); + Vec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, + DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl)); } // Shift to the LSB. @@ -17202,8 +17235,9 @@ } // Returns the appropriate wrapper opcode for a global reference. -unsigned X86TargetLowering::getGlobalWrapperKind( - const GlobalValue *GV, const unsigned char OpFlags) const { +unsigned +X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV, + const unsigned char OpFlags) const { // References to absolute symbols are never PC-relative. if (GV && GV->isAbsoluteSymbolRef()) return X86ISD::Wrapper; @@ -17226,8 +17260,8 @@ // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOV32ri. -SDValue -X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -17275,11 +17309,10 @@ return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); } -SDValue -X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. - unsigned char OpFlags = - Subtarget.classifyBlockAddressReference(); + unsigned char OpFlags = Subtarget.classifyBlockAddressReference(); const BlockAddress *BA = cast(Op)->getBlockAddress(); int64_t Offset = cast(Op)->getOffset(); SDLoc dl(Op); @@ -17369,31 +17402,30 @@ return Result; } -SDValue -X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); } -static SDValue -GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags, bool LocalDynamic = false) { +static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, + GlobalAddressSDNode *GA, SDValue *InFlag, + const EVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags, + bool LocalDynamic = false) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, - GA->getValueType(0), - GA->getOffset(), - OperandFlags); + SDValue TGA = DAG.getTargetGlobalAddress( + GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); - X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR - : X86ISD::TLSADDR; + X86ISD::NodeType CallType = + LocalDynamic ? X86ISD::TLSBASEADDR : X86ISD::TLSADDR; if (InFlag) { - SDValue Ops[] = { Chain, TGA, *InFlag }; + SDValue Ops[] = {Chain, TGA, *InFlag}; Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } else { - SDValue Ops[] = { Chain, TGA }; + SDValue Ops[] = {Chain, TGA}; Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } @@ -17406,36 +17438,35 @@ } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit -static SDValue -LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const EVT PtrVT) { +static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT) { SDValue InFlag; - SDLoc dl(GA); // ? function entry point might be better - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - SDLoc(), PtrVT), InFlag); + SDLoc dl(GA); // ? function entry point might be better + SDValue Chain = DAG.getCopyToReg( + DAG.getEntryNode(), dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit -static SDValue -LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const EVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, - X86::RAX, X86II::MO_TLSGD); +static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT) { + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, + X86II::MO_TLSGD); } static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG, - const EVT PtrVT, + SelectionDAG &DAG, const EVT PtrVT, bool is64Bit) { SDLoc dl(GA); // Get the start address of the TLS block for this module. - X86MachineFunctionInfo *MFI = DAG.getMachineFunction() - .getInfo(); + X86MachineFunctionInfo *MFI = + DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); SDValue Base; @@ -17444,7 +17475,8 @@ X86II::MO_TLSLD, /*LocalDynamic=*/true); } else { SDValue InFlag; - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, + SDValue Chain = DAG.getCopyToReg( + DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, @@ -17457,9 +17489,8 @@ // Build x@dtpoff. unsigned char OperandFlags = X86II::MO_DTPOFF; unsigned WrapperKind = X86ISD::Wrapper; - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, - GA->getValueType(0), - GA->getOffset(), OperandFlags); + SDValue TGA = DAG.getTargetGlobalAddress( + GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); // Add x@dtpoff with the base. @@ -17473,8 +17504,8 @@ SDLoc dl(GA); // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). - Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), - is64Bit ? 257 : 256)); + Value *Ptr = Constant::getNullValue( + Type::getInt8PtrTy(*DAG.getContext(), is64Bit ? 257 : 256)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), @@ -17500,9 +17531,8 @@ // emit "addl x@ntpoff,%eax" (local exec) // or "addl x@indntpoff,%eax" (initial exec) // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic) - SDValue TGA = - DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), - GA->getOffset(), OperandFlags); + SDValue TGA = DAG.getTargetGlobalAddress( + GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) { @@ -17521,8 +17551,8 @@ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } -SDValue -X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); @@ -17536,17 +17566,16 @@ if (Subtarget.isTargetELF()) { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { - case TLSModel::GeneralDynamic: - if (Subtarget.is64Bit()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); - return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); - case TLSModel::LocalDynamic: - return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, - Subtarget.is64Bit()); - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), - PositionIndependent); + case TLSModel::GeneralDynamic: + if (Subtarget.is64Bit()) + return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); + return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); + case TLSModel::LocalDynamic: + return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit()); + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), + PositionIndependent); } llvm_unreachable("Unknown TLS model."); } @@ -17554,8 +17583,8 @@ if (Subtarget.isTargetDarwin()) { // Darwin only has one model of TLS. Lower to that. unsigned char OpFlag = 0; - unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ? - X86ISD::WrapperRIP : X86ISD::Wrapper; + unsigned WrapperKind = + Subtarget.isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper; // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. @@ -17565,9 +17594,8 @@ else OpFlag = X86II::MO_TLVP; SDLoc DL(Op); - SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, - GA->getValueType(0), - GA->getOffset(), OpFlag); + SDValue Result = DAG.getTargetGlobalAddress( + GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag); SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC32, the address is actually $g + Offset. @@ -17581,7 +17609,7 @@ SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); - SDValue Args[] = { Chain, Offset }; + SDValue Args[] = {Chain, Offset}; Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), DAG.getIntPtrConstant(0, DL, true), @@ -17598,8 +17626,7 @@ } if (Subtarget.isTargetKnownWindowsMSVC() || - Subtarget.isTargetWindowsItanium() || - Subtarget.isTargetWindowsGNU()) { + Subtarget.isTargetWindowsItanium() || Subtarget.isTargetWindowsGNU()) { // Just use the implicit TLS architecture // Need to generate something similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -17617,11 +17644,9 @@ // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly // use its literal value of 0x2C. - Value *Ptr = Constant::getNullValue(Subtarget.is64Bit() - ? Type::getInt8PtrTy(*DAG.getContext(), - 256) - : Type::getInt32PtrTy(*DAG.getContext(), - 257)); + Value *Ptr = Constant::getNullValue( + Subtarget.is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) + : Type::getInt32PtrTy(*DAG.getContext(), 257)); SDValue TlsArray = Subtarget.is64Bit() ? DAG.getIntPtrConstant(0x58, dl) @@ -17655,9 +17680,9 @@ res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo()); // Get the offset of start of .tls section - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, - GA->getValueType(0), - GA->getOffset(), X86II::MO_SECREL); + SDValue TGA = + DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), + GA->getOffset(), X86II::MO_SECREL); SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); // The address of the thread local variable is the add of the thread @@ -17679,7 +17704,7 @@ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); + SDValue ShAmt = Op.getOperand(2); // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away // during isel. @@ -17704,7 +17729,7 @@ SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, dl, MVT::i8)); SDValue Cond = DAG.getSetCC(dl, MVT::i8, AndNode, - DAG.getConstant(0, dl, MVT::i8), ISD::SETNE); + DAG.getConstant(0, dl, MVT::i8), ISD::SETNE); SDValue Hi, Lo; if (Op.getOpcode() == ISD::SHL_PARTS) { @@ -17715,7 +17740,7 @@ Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); } - return DAG.getMergeValues({ Lo, Hi }, dl); + return DAG.getMergeValues({Lo, Hi}, dl); } static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, @@ -17740,12 +17765,12 @@ APInt APIntShiftAmt; if (isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); - return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, - Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, + Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); } - return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, - Op0, Op1, Amt); + return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, Op0, + Op1, Amt); } assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -17773,13 +17798,14 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert((Op.getOpcode() == ISD::SINT_TO_FP || - Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!"); + Op.getOpcode() == ISD::UINT_TO_FP) && + "Unexpected opcode!"); SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); - if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || - (VT != MVT::f32 && VT != MVT::f64)) + if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || + (VT != MVT::f32 && VT != MVT::f64)) return SDValue(); // Pack the i64 into a vector, do the operation and extract. @@ -17799,22 +17825,22 @@ static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT, const X86Subtarget &Subtarget) { switch (Opcode) { - case ISD::SINT_TO_FP: - // TODO: Handle wider types with AVX/AVX512. - if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32) - return false; - // CVTDQ2PS or (V)CVTDQ2PD - return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64); - - case ISD::UINT_TO_FP: - // TODO: Handle wider types and i64 elements. - if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32) - return false; - // VCVTUDQ2PS or VCVTUDQ2PD - return ToVT == MVT::v4f32 || ToVT == MVT::v4f64; - - default: + case ISD::SINT_TO_FP: + // TODO: Handle wider types with AVX/AVX512. + if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32) + return false; + // CVTDQ2PS or (V)CVTDQ2PD + return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64); + + case ISD::UINT_TO_FP: + // TODO: Handle wider types and i64 elements. + if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32) return false; + // VCVTUDQ2PS or VCVTUDQ2PD + return ToVT == MVT::v4f32 || ToVT == MVT::v4f64; + + default: + return false; } } @@ -17893,14 +17919,13 @@ return V; SDValue ValueToStore = Op.getOperand(0); - if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && - !Subtarget.is64Bit()) + if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); - unsigned Size = SrcVT.getSizeInBits()/8; + unsigned Size = SrcVT.getSizeInBits() / 8; MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false); @@ -17989,24 +18014,24 @@ LLVMContext *Context = DAG.getContext(); // Build some magic constants. - static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; + static const uint32_t CV0[] = {0x43300000, 0x45300000, 0, 0}; Constant *C0 = ConstantDataVector::get(*Context, CV0); auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); - SmallVector CV1; + SmallVector CV1; CV1.push_back( - ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), - APInt(64, 0x4330000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), + APInt(64, 0x4330000000000000ULL)))); CV1.push_back( - ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), - APInt(64, 0x4530000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), + APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16); // Load the 64-bit value into an XMM register. - SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, - Op.getOperand(0)); + SDValue XR1 = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), @@ -18027,7 +18052,7 @@ // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'. Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { - SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); + SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1, -1}); Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); } @@ -18040,12 +18065,12 @@ const X86Subtarget &Subtarget) { SDLoc dl(Op); // FP constant to bias correct the final result. - SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, - MVT::f64); + SDValue Bias = + DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // Load the 32-bit value into an XMM register. - SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, - Op.getOperand(0)); + SDValue Load = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(0)); // Zero out the upper parts of the register. Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); @@ -18102,7 +18127,7 @@ SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask); SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI); - fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW); + fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW); SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO); // Add the two halves. @@ -18285,9 +18310,9 @@ MachineMemOperand::MOLoad, 8, 8); SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); - SDValue Ops[] = { Store, StackSlot }; - SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, - MVT::i64, MMO); + SDValue Ops[] = {Store, StackSlot}; + SDValue Fild = + DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -18325,9 +18350,8 @@ // Otherwise it is assumed to be a conversion from one of f32, f64 or f80 // to i16, i32 or i64, and we lower it to a legal sequence and return the // result. -SDValue -X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { +SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { SDLoc DL(Op); EVT DstTy = Op.getValueType(); @@ -18352,8 +18376,7 @@ DstTy = MVT::i64; } - assert(DstTy.getSimpleVT() <= MVT::i64 && - DstTy.getSimpleVT() >= MVT::i16 && + assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 && "Unknown FP_TO_INT to lower!"); // We lower FP->int64 into FISTP64 followed by a load from a temporary @@ -18389,8 +18412,8 @@ bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. - Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, - &LosesInfo); + Status = Thresh.convert(APFloat::IEEEdouble(), + APFloat::rmNearestTiesToEven, &LosesInfo); else if (TheVT == MVT::f80) Status = Thresh.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &LosesInfo); @@ -18400,18 +18423,16 @@ SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); - SDValue Cmp = DAG.getSetCC(DL, - getSetCCResultType(DAG.getDataLayout(), - *DAG.getContext(), TheVT), - Value, ThreshVal, ISD::SETLT); - Adjust = DAG.getSelect(DL, MVT::i64, Cmp, - DAG.getConstant(0, DL, MVT::i64), - DAG.getConstant(APInt::getSignMask(64), - DL, MVT::i64)); + SDValue Cmp = DAG.getSetCC( + DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT), + Value, ThreshVal, ISD::SETLT); + Adjust = + DAG.getSelect(DL, MVT::i64, Cmp, DAG.getConstant(0, DL, MVT::i64), + DAG.getConstant(APInt::getSignMask(64), DL, MVT::i64)); SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal); - Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), - *DAG.getContext(), TheVT), - Value, ThreshVal, ISD::SETLT); + Cmp = DAG.getSetCC( + DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT), + Value, ThreshVal, ISD::SETLT); Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); } @@ -18423,7 +18444,7 @@ assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI); SDVTList Tys = DAG.getVTList(TheVT, MVT::Other); - SDValue Ops[] = { Chain, StackSlot }; + SDValue Ops[] = {Chain, StackSlot}; unsigned FLDSize = TheVT.getStoreSize(); assert(FLDSize <= MemSize && "Stack slot not big enough"); @@ -18436,10 +18457,9 @@ // Build the FP_TO_INT*_IN_MEM MachineMemOperand *MMO = MF.getMachineMemOperand( MPI, MachineMemOperand::MOStore, MemSize, MemSize); - SDValue Ops[] = { Chain, Value, StackSlot }; - SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL, - DAG.getVTList(MVT::Other), - Ops, DstTy, MMO); + SDValue Ops[] = {Chain, Value, StackSlot}; + SDValue FIST = DAG.getMemIntrinsicNode( + X86ISD::FP_TO_INT_IN_MEM, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO); SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI); @@ -18474,8 +18494,8 @@ if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) return SDValue(); - In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), - MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8)); + In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MVT::v16i8, In, + DAG.getUNDEF(MVT::v8i8)); // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input. return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In); } @@ -18530,9 +18550,8 @@ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); } -static SDValue LowerZERO_EXTEND_Mask(SDValue Op, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue LowerZERO_EXTEND_Mask(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); @@ -18563,10 +18582,9 @@ if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { NumElts *= 512 / ExtVT.getSizeInBits(); InVT = MVT::getVectorVT(MVT::i1, NumElts); - In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), - In, DAG.getIntPtrConstant(0, DL)); - WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), - NumElts); + In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), In, + DAG.getIntPtrConstant(0, DL)); + WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); } SDValue One = DAG.getConstant(1, DL, WideVT); @@ -18725,14 +18743,12 @@ if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { // We need to shift to get the lsb into sign position. // Shift packed bytes not supported natively, bitcast to word - MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); - In = DAG.getNode(ISD::SHL, DL, ExtVT, - DAG.getBitcast(ExtVT, In), + MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits() / 16); + In = DAG.getNode(ISD::SHL, DL, ExtVT, DAG.getBitcast(ExtVT, In), DAG.getConstant(ShiftInx, DL, ExtVT)); In = DAG.getBitcast(InVT, In); } - return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), - In, ISD::SETGT); + return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); } // Use TESTD/Q, extended vector to packed dword/qword. assert((InVT.is256BitVector() || InVT.is128BitVector()) && @@ -18765,7 +18781,8 @@ // We either have 8 elements or we're allowed to use 512-bit vectors. // If we have VLX, we want to use the narrowest vector that can get the // job done so we use vXi32. - MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts); + MVT EltVT = + Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512 / NumElts); MVT ExtVT = MVT::getVectorVT(EltVT, NumElts); In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); InVT = ExtVT; @@ -18856,15 +18873,14 @@ In = DAG.getBitcast(MVT::v32i8, In); // The PSHUFB mask: - static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1, - 16, 17, 20, 21, 24, 25, 28, 29, - -1, -1, -1, -1, -1, -1, -1, -1 }; + static const int ShufMask1[] = { + 0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, + 16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1}; In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); In = DAG.getBitcast(MVT::v4i64, In); - static const int ShufMask2[] = {0, 2, -1, -1}; - In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); + static const int ShufMask2[] = {0, 2, -1, -1}; + In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); return DAG.getBitcast(VT, In); @@ -18880,8 +18896,8 @@ OpHi = DAG.getBitcast(MVT::v16i8, OpHi); // The PSHUFB mask: - static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1}; + static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, + -1, -1, -1, -1, -1, -1, -1, -1}; OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); @@ -18942,8 +18958,8 @@ TruncVT = MVT::v8i1; Opc = ISD::FP_TO_UINT; Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, - DAG.getUNDEF(MVT::v8f64), - Src, DAG.getIntPtrConstant(0, dl)); + DAG.getUNDEF(MVT::v8f64), Src, + DAG.getIntPtrConstant(0, dl)); } SDValue Res = DAG.getNode(Opc, dl, ResVT, Src); Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); @@ -18952,7 +18968,7 @@ } assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); - if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { + if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32))); @@ -19001,9 +19017,9 @@ assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, - In, DAG.getUNDEF(SVT))); + return DAG.getNode( + X86ISD::VFPEXT, DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT))); } /// Horizontal vector math instructions may be slower than normal math with @@ -19046,12 +19062,20 @@ // TODO: Allow commuted (f)sub by negating the result of (F)HSUB? unsigned HOpcode; switch (Op.getOpcode()) { - case ISD::ADD: HOpcode = X86ISD::HADD; break; - case ISD::SUB: HOpcode = X86ISD::HSUB; break; - case ISD::FADD: HOpcode = X86ISD::FHADD; break; - case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; - default: - llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); + case ISD::ADD: + HOpcode = X86ISD::HADD; + break; + case ISD::SUB: + HOpcode = X86ISD::HSUB; + break; + case ISD::FADD: + HOpcode = X86ISD::FHADD; + break; + case ISD::FSUB: + HOpcode = X86ISD::FHSUB; + break; + default: + llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); } unsigned LExtIndex = LHS.getConstantOperandVal(1); unsigned RExtIndex = RHS.getConstantOperandVal(1); @@ -19136,16 +19160,15 @@ unsigned EltBits = VT.getScalarSizeInBits(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... - APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : - APInt::getSignMask(EltBits); + APInt MaskElt = + IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits); const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT); SDValue Op0 = Op.getOperand(0); bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); - unsigned LogicOp = IsFABS ? X86ISD::FAND : - IsFNABS ? X86ISD::FOR : - X86ISD::FXOR; + unsigned LogicOp = + IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; if (VT.isVector() || IsF128) @@ -19171,7 +19194,8 @@ // And if it is bigger, shrink it first. if (Sign.getSimpleValueType().bitsGT(VT)) - Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl)); + Sign = + DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl)); // At this point the operands and the result should have the same // type, and that won't be f80 since that is not custom lowered. @@ -19222,8 +19246,9 @@ // OR the magnitude value with the sign bit. SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit); - return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, - DAG.getIntPtrConstant(0, dl)); + return !IsFakeVector ? Or + : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, + DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { @@ -19384,14 +19409,20 @@ bool NeedCF = false; bool NeedOF = false; switch (X86CC) { - default: break; - case X86::COND_A: case X86::COND_AE: - case X86::COND_B: case X86::COND_BE: + default: + break; + case X86::COND_A: + case X86::COND_AE: + case X86::COND_B: + case X86::COND_BE: NeedCF = true; break; - case X86::COND_G: case X86::COND_GE: - case X86::COND_L: case X86::COND_LE: - case X86::COND_O: case X86::COND_NO: { + case X86::COND_G: + case X86::COND_GE: + case X86::COND_L: + case X86::COND_LE: + case X86::COND_O: + case X86::COND_NO: { // Check if we really need to set the // Overflow flag. If NoSignedWrap is present // that is not actually needed. @@ -19442,20 +19473,31 @@ // using an RMW op or only the flags are used. Otherwise, leave // the node alone and emit a 'test' instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && - UI->getOpcode() != ISD::SETCC && + UE = Op.getNode()->use_end(); + UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC && UI->getOpcode() != ISD::STORE) goto default_case; // Otherwise use a regular EFLAGS-setting instruction. switch (ArithOp.getOpcode()) { - default: llvm_unreachable("unexpected operator!"); - case ISD::ADD: Opcode = X86ISD::ADD; break; - case ISD::SUB: Opcode = X86ISD::SUB; break; - case ISD::XOR: Opcode = X86ISD::XOR; break; - case ISD::AND: Opcode = X86ISD::AND; break; - case ISD::OR: Opcode = X86ISD::OR; break; + default: + llvm_unreachable("unexpected operator!"); + case ISD::ADD: + Opcode = X86ISD::ADD; + break; + case ISD::SUB: + Opcode = X86ISD::SUB; + break; + case ISD::XOR: + Opcode = X86ISD::XOR; + break; + case ISD::AND: + Opcode = X86ISD::AND; + break; + case ISD::OR: + Opcode = X86ISD::OR; + break; } NumOperands = 2; @@ -19496,8 +19538,9 @@ if (CmpVT.isFloatingPoint()) return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); - assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || - CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!"); + assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || CmpVT == MVT::i32 || + CmpVT == MVT::i64) && + "Unexpected VT!"); // Only promote the compare up to I32 if it is a 16 bit operation // with an immediate. 16 bit immediates are to be avoided. @@ -19544,8 +19587,7 @@ SelectionDAG &DAG) const { // If the subtarget does not support the FUCOMI instruction, floating-point // comparisons have to be converted. - if (Subtarget.hasCMov() || - Cmp.getOpcode() != X86ISD::CMP || + if (Subtarget.hasCMov() || Cmp.getOpcode() != X86ISD::CMP || !Cmp.getOperand(0).getValueType().isFloatingPoint() || !Cmp.getOperand(1).getValueType().isFloatingPoint()) return Cmp; @@ -19581,9 +19623,8 @@ /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). -SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, - SelectionDAG &DAG, int Enabled, - int &RefinementSteps, +SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, SelectionDAG &DAG, + int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const { EVT VT = Op.getValueType(); @@ -19652,15 +19693,12 @@ /// This is because we still need one division to calculate the reciprocal and /// then we need two multiplies by that reciprocal as replacements for the /// original divisions. -unsigned X86TargetLowering::combineRepeatedFPDivisors() const { - return 2; -} +unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. -static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, - const SDLoc &dl, SelectionDAG &DAG, - SDValue &X86CC) { +static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, + SelectionDAG &DAG, SDValue &X86CC) { assert(And.getOpcode() == ISD::AND && "Expected AND node!"); SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); @@ -19701,8 +19739,8 @@ if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && isPowerOf2_64(AndRHSVal)) { Src = AndLHS; - BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, - Src.getValueType()); + BitNo = + DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, Src.getValueType()); } } } @@ -19732,8 +19770,8 @@ if (Src.getValueType() != BitNo.getValueType()) BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); - X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, - dl, MVT::i8); + X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, dl, + MVT::i8); return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); } @@ -19754,27 +19792,56 @@ // 6 - NLE // 7 - ORD switch (SetCCOpcode) { - default: llvm_unreachable("Unexpected SETCC condition"); + default: + llvm_unreachable("Unexpected SETCC condition"); case ISD::SETOEQ: - case ISD::SETEQ: SSECC = 0; break; + case ISD::SETEQ: + SSECC = 0; + break; case ISD::SETOGT: - case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH; + case ISD::SETGT: + Swap = true; + LLVM_FALLTHROUGH; case ISD::SETLT: - case ISD::SETOLT: SSECC = 1; break; + case ISD::SETOLT: + SSECC = 1; + break; case ISD::SETOGE: - case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; + case ISD::SETGE: + Swap = true; + LLVM_FALLTHROUGH; case ISD::SETLE: - case ISD::SETOLE: SSECC = 2; break; - case ISD::SETUO: SSECC = 3; break; + case ISD::SETOLE: + SSECC = 2; + break; + case ISD::SETUO: + SSECC = 3; + break; case ISD::SETUNE: - case ISD::SETNE: SSECC = 4; break; - case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGE: SSECC = 5; break; - case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGT: SSECC = 6; break; - case ISD::SETO: SSECC = 7; break; - case ISD::SETUEQ: SSECC = 8; break; - case ISD::SETONE: SSECC = 12; break; + case ISD::SETNE: + SSECC = 4; + break; + case ISD::SETULE: + Swap = true; + LLVM_FALLTHROUGH; + case ISD::SETUGE: + SSECC = 5; + break; + case ISD::SETULT: + Swap = true; + LLVM_FALLTHROUGH; + case ISD::SETUGT: + SSECC = 6; + break; + case ISD::SETO: + SSECC = 7; + break; + case ISD::SETUEQ: + SSECC = 8; + break; + case ISD::SETONE: + SSECC = 12; + break; } if (Swap) std::swap(Op0, Op1); @@ -19806,7 +19873,7 @@ // Issue the operation on the smaller types and concatenate the result back MVT EltVT = VT.getVectorElementType(); - MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); @@ -19975,10 +20042,10 @@ CombineOpc = X86ISD::FAND; } - SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC0, dl, MVT::i8)); - SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC1, dl, MVT::i8)); + SDValue Cmp0 = + DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC0, dl, MVT::i8)); + SDValue Cmp1 = + DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC1, dl, MVT::i8)); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { // Handle all other FP comparisons here. @@ -20025,17 +20092,30 @@ // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; switch (Cond) { - default: llvm_unreachable("Unexpected SETCC condition"); + default: + llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: - case ISD::SETLT: CmpMode = 0x00; break; + case ISD::SETLT: + CmpMode = 0x00; + break; case ISD::SETULE: - case ISD::SETLE: CmpMode = 0x01; break; + case ISD::SETLE: + CmpMode = 0x01; + break; case ISD::SETUGT: - case ISD::SETGT: CmpMode = 0x02; break; + case ISD::SETGT: + CmpMode = 0x02; + break; case ISD::SETUGE: - case ISD::SETGE: CmpMode = 0x03; break; - case ISD::SETEQ: CmpMode = 0x04; break; - case ISD::SETNE: CmpMode = 0x05; break; + case ISD::SETGE: + CmpMode = 0x03; + break; + case ISD::SETEQ: + CmpMode = 0x04; + break; + case ISD::SETNE: + CmpMode = 0x05; + break; } // Are we comparing unsigned or signed integers? @@ -20131,11 +20211,20 @@ bool Invert = false; unsigned Opc; switch (Cond) { - default: llvm_unreachable("Unexpected condition code"); - case ISD::SETUGT: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETULE: Opc = ISD::UMIN; break; - case ISD::SETULT: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETUGE: Opc = ISD::UMAX; break; + default: + llvm_unreachable("Unexpected condition code"); + case ISD::SETUGT: + Invert = true; + LLVM_FALLTHROUGH; + case ISD::SETULE: + Opc = ISD::UMIN; + break; + case ISD::SETULT: + Invert = true; + LLVM_FALLTHROUGH; + case ISD::SETUGE: + Opc = ISD::UMAX; + break; } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -20157,10 +20246,10 @@ // operations may be required for some comparisons. unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ : X86ISD::PCMPGT; - bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || - Cond == ISD::SETGE || Cond == ISD::SETUGE; - bool Invert = Cond == ISD::SETNE || - (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); + bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || Cond == ISD::SETGE || + Cond == ISD::SETUGE; + bool Invert = + Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); if (Swap) std::swap(Op0, Op1); @@ -20192,8 +20281,8 @@ SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); // Create masks for only the low parts/high parts of the 64 bit integers. - static const int MaskHi[] = { 1, 1, 3, 3 }; - static const int MaskLo[] = { 0, 0, 2, 2 }; + static const int MaskHi[] = {1, 1, 3, 3}; + static const int MaskLo[] = {0, 0, 2, 2}; SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); @@ -20220,7 +20309,7 @@ SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); // Make sure the lower and upper halves are both all-ones. - static const int Mask[] = { 1, 0, 3, 2 }; + static const int Mask[] = {1, 0, 3, 2}; SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); @@ -20235,8 +20324,8 @@ // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); - SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, - VT); + SDValue SM = + DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT); Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); } @@ -20253,8 +20342,7 @@ // Try to select this as a KORTEST+SETCC if possible. static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - SDValue &X86CC) { + const X86Subtarget &Subtarget, SDValue &X86CC) { // Only support equality comparisons. if (CC != ISD::SETEQ && CC != ISD::SETNE) return SDValue(); @@ -20354,7 +20442,8 @@ MVT VT = Op.getSimpleValueType(); - if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); + if (VT.isVector()) + return LowerVSETCC(Op, Subtarget, DAG); assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); @@ -20370,7 +20459,8 @@ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); } -SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); @@ -20383,8 +20473,8 @@ // Recreate the carry if needed. EVT CarryVT = Carry.getValueType(); APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); - Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), - Carry, DAG.getConstant(NegOne, DL, CarryVT)); + Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry, + DAG.getConstant(NegOne, DL, CarryVT)); SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); @@ -20404,7 +20494,8 @@ unsigned BaseOp = 0; SDLoc DL(Op); switch (Op.getOpcode()) { - default: llvm_unreachable("Unknown ovf instruction!"); + default: + llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: BaseOp = X86ISD::ADD; Cond = X86::COND_O; @@ -20478,12 +20569,13 @@ SDValue VOp0 = V.getOperand(0); unsigned InBits = VOp0.getValueSizeInBits(); unsigned Bits = V.getValueSizeInBits(); - return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); + return DAG.MaskedValueIsZero(VOp0, + APInt::getHighBitsSet(InBits, InBits - Bits)); } SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool AddTest = true; - SDValue Cond = Op.getOperand(0); + SDValue Cond = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); SDLoc DL(Op); @@ -20538,8 +20630,8 @@ SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, - VSel, DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel, + DAG.getIntPtrConstant(0, DL)); } SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); @@ -20628,14 +20720,14 @@ return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, CmpZero); } - Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, - CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); + Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, + DAG.getConstant(1, DL, CmpOp0.getValueType())); Cmp = ConvertCmpIfNecessary(Cmp, DAG); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SDValue Zero = DAG.getConstant(0, DL, Op.getValueType()); - SDValue Res = // Res = 0 or -1. - DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp); + SDValue Res = // Res = 0 or -1. + DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); @@ -20670,7 +20762,8 @@ if (CmpSz > VT.getSizeInBits()) Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); else if (CmpSz < VT.getSizeInBits()) - Neg = DAG.getNode(ISD::AND, DL, VT, + Neg = DAG.getNode( + ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), DAG.getConstant(1, DL, VT)); else @@ -20692,8 +20785,7 @@ // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. unsigned CondOpcode = Cond.getOpcode(); - if (CondOpcode == X86ISD::SETCC || - CondOpcode == X86ISD::SETCC_CARRY) { + if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -20701,7 +20793,7 @@ bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && - !isScalarFPTypeInSSEReg(VT)) // FPStack? + !isScalarFPTypeInSSEReg(VT)) // FPStack? IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || @@ -20754,9 +20846,9 @@ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (isNullConstant(Op1) || isNullConstant(Op2))) { - SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), - Cond); + SDValue Res = + DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, DL, MVT::i8), Cond); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; @@ -20766,14 +20858,15 @@ // X86 doesn't have an i8 cmov. If both operands are the result of a truncate // widen the cmov and push the truncate through. This avoids introducing a new // branch during isel and doesn't add any extensions. - if (Op.getValueType() == MVT::i8 && - Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { + if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE && + Op2.getOpcode() == ISD::TRUNCATE) { SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); if (T1.getValueType() == T2.getValueType() && // Blacklist CopyFromReg to avoid partial register stalls. - T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ - SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, - CC, Cond); + T1.getOpcode() != ISD::CopyFromReg && + T2.getOpcode() != ISD::CopyFromReg) { + SDValue Cmov = + DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, CC, Cond); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } } @@ -20789,19 +20882,18 @@ !MayFoldLoad(Op2))) { Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); - SDValue Ops[] = { Op2, Op1, CC, Cond }; + SDValue Ops[] = {Op2, Op1, CC, Cond}; SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDValue Ops[] = { Op2, Op1, CC, Cond }; + SDValue Ops[] = {Op2, Op1, CC, Cond}; return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); } -static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, - const X86Subtarget &Subtarget, +static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); @@ -20827,8 +20919,8 @@ if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { NumElts *= 512 / ExtVT.getSizeInBits(); InVT = MVT::getVectorVT(MVT::i1, NumElts); - In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), - In, DAG.getIntPtrConstant(0, dl)); + In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), In, + DAG.getIntPtrConstant(0, dl)); WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); } @@ -20907,9 +20999,9 @@ InVT = In.getSimpleValueType(); } - // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results, - // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still - // need to be handled here for 256/512-bit results. + // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit + // results, so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* + // instructions still need to be handled here for 256/512-bit results. if (Subtarget.hasInt256()) { assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); @@ -20918,9 +21010,8 @@ // FIXME: Apparently we create inreg operations that could be regular // extends. - unsigned ExtOpc = - Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND; + unsigned ExtOpc = Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; return DAG.getNode(ExtOpc, dl, VT, In); } @@ -21012,8 +21103,8 @@ if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) return SDValue(); - In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), - MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8)); + In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MVT::v16i8, In, + DAG.getUNDEF(MVT::v8i8)); return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In); } @@ -21035,9 +21126,9 @@ SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); unsigned NumElems = InVT.getVectorNumElements(); - SmallVector ShufMask(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask[i] = i + NumElems/2; + SmallVector ShufMask(NumElems, -1); + for (unsigned i = 0; i != NumElems / 2; ++i) + ShufMask[i] = i + NumElems / 2; SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi); @@ -21123,7 +21214,7 @@ assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && "Unexpected VT"); if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) != - TargetLowering::TypeWidenVector) + TargetLowering::TypeWidenVector) return SDValue(); // Widen the vector, cast to a v2x64 type, extract the single 64-bit element @@ -21151,11 +21242,10 @@ // TODO: It is possible to support ZExt by zeroing the undef values during // the shuffle phase or after the shuffle. static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { + SelectionDAG &DAG) { MVT RegVT = Op.getSimpleValueType(); assert(RegVT.isVector() && "We only custom lower vector loads."); - assert(RegVT.isInteger() && - "We only custom lower integer vector loads."); + assert(RegVT.isInteger() && "We only custom lower integer vector loads."); LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); @@ -21190,8 +21280,8 @@ ISD::LoadExtType Ext = Ld->getExtensionType(); - assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) - && "Only anyext and sext are currently implemented."); + assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) && + "Only anyext and sext are currently implemented."); assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -21283,9 +21373,8 @@ // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - loadRegSize / MemVT.getScalarSizeInBits()); + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + loadRegSize / MemVT.getScalarSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); @@ -21297,8 +21386,8 @@ SmallVector Chains; SDValue Ptr = Ld->getBasePtr(); unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8; - SDValue Increment = DAG.getConstant(OffsetInc, dl, - TLI.getPointerTy(DAG.getDataLayout())); + SDValue Increment = + DAG.getConstant(OffsetInc, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Res = DAG.getUNDEF(LoadUnitVecVT); unsigned Offset = 0; @@ -21306,10 +21395,9 @@ unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset); // Perform a single load. - SDValue ScalarLoad = - DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, - Ld->getPointerInfo().getWithOffset(Offset), - NewAlign, Ld->getMemOperand()->getFlags()); + SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo().getWithOffset(Offset), + NewAlign, Ld->getMemOperand()->getFlags()); Chains.push_back(ScalarLoad.getValue(1)); // Create the first element type using SCALAR_TO_VECTOR in order to avoid // another round of DAGCombining. @@ -21331,13 +21419,13 @@ unsigned SizeRatio = RegSz / MemSz; if (Ext == ISD::SEXTLOAD) { - SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG); + SDValue Sext = getExtendInVec(/*Signed*/ true, dl, RegVT, SlicedVec, DAG); return DAG.getMergeValues({Sext, TF}, dl); } if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && MemVT == MVT::v8i8) { - SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG); + SDValue Sext = getExtendInVec(/*Signed*/ false, dl, RegVT, SlicedVec, DAG); return DAG.getMergeValues({Sext, TF}, dl); } @@ -21380,8 +21468,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Chain = Op.getOperand(0); - SDValue Cond = Op.getOperand(1); - SDValue Dest = Op.getOperand(2); + SDValue Cond = Op.getOperand(1); + SDValue Dest = Op.getOperand(2); SDLoc dl(Op); SDValue CC; bool Inverted = false; @@ -21422,8 +21510,7 @@ // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. unsigned CondOpcode = Cond.getOpcode(); - if (CondOpcode == X86ISD::SETCC || - CondOpcode == X86ISD::SETCC_CARRY) { + if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -21434,7 +21521,8 @@ addTest = false; } else { switch (cast(CC)->getZExtValue()) { - default: break; + default: + break; case X86::COND_O: case X86::COND_B: // These can only come from an arithmetic instruction with overflow, @@ -21466,11 +21554,10 @@ // Also, recognize the pattern generated by an FCMP_UNE. We can emit // two branches instead of an explicit OR instruction with a // separate test. - if (Cmp == Cond.getOperand(1).getOperand(1) && - isX86LogicalCmp(Cmp)) { + if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp)) { CC = Cond.getOperand(0).getOperand(0); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, + Dest, CC, Cmp); CC = Cond.getOperand(1).getOperand(0); Cond = Cmp; addTest = false; @@ -21481,11 +21568,10 @@ // separate test. However, we only do this if this block doesn't // have a fall-through edge, because this requires an explicit // jmp when the condition is false. - if (Cmp == Cond.getOperand(1).getOperand(1) && - isX86LogicalCmp(Cmp) && + if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp) && Op.getNode()->hasOneUse()) { X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); SDNode *User = *Op.getNode()->use_begin(); @@ -21495,15 +21581,15 @@ if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = - DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; Dest = FalseBB; - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, + Dest, CC, Cmp); X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); + (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cmp; @@ -21516,7 +21602,7 @@ // It should be transformed during dag combiner except when the condition // is set by a arithmetics with overflow node. X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); @@ -21536,7 +21622,7 @@ if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = - DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; Dest = FalseBB; @@ -21545,8 +21631,8 @@ Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, + Dest, CC, Cmp); CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; @@ -21557,12 +21643,12 @@ // For FCMP_UNE, we can emit // two branches instead of an explicit OR instruction with a // separate test. - SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - Cond.getOperand(0), Cond.getOperand(1)); + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), + Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, + CC, Cmp); CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; @@ -21572,7 +21658,7 @@ if (addTest) { // Look pass the truncate if the high bits are known zero. if (isTruncWithZeroHighBitsInput(Cond, DAG)) - Cond = Cond.getOperand(0); + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -21589,12 +21675,12 @@ if (addTest) { X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; CC = DAG.getConstant(X86Cond, dl, MVT::i8); - Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()), - X86Cond, dl, DAG); + Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()), X86Cond, + dl, DAG); } Cond = ConvertCmpIfNecessary(Cond, DAG); - return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cond); + return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, + Cond); } // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. @@ -21602,9 +21688,8 @@ // bytes in one go. Touching the stack at 4K increments is necessary to ensure // that the guard pages used by the OS virtual memory manager are allocated in // correct sequence. -SDValue -X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); bool EmitStackProbe = !getStackProbeSymbolName(MF).empty(); @@ -21615,7 +21700,7 @@ // Get the inputs. SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); + SDValue Size = Op.getOperand(1); unsigned Align = cast(Op.getOperand(2))->getZExtValue(); EVT VT = Node->getValueType(0); @@ -21640,7 +21725,7 @@ Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Result = DAG.getNode(ISD::AND, dl, VT, Result, - DAG.getConstant(-(uint64_t)Align, dl, VT)); + DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -21660,7 +21745,7 @@ unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, - DAG.getRegister(Vreg, SPTy)); + DAG.getRegister(Vreg, SPTy)); } else { SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); @@ -21734,8 +21819,9 @@ MemOps.push_back(Store); // Store ptr to reg_save_area. - FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( - Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); + FIN = DAG.getNode( + ISD::ADD, DL, PtrVT, FIN, + DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); Store = DAG.getStore( Op.getOperand(0), DL, RSFIN, FIN, @@ -21745,8 +21831,7 @@ } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget.is64Bit() && - "LowerVAARG only handles 64-bit va_arg!"); + assert(Subtarget.is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); assert(Op.getNumOperands() == 4); MachineFunction &MF = DAG.getMachineFunction(); @@ -21771,9 +21856,9 @@ if (ArgVT == MVT::f80) { llvm_unreachable("va_arg for f80 not yet implemented"); } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { - ArgMode = 2; // Argument passed in XMM register. Use fp_offset. + ArgMode = 2; // Argument passed in XMM register. Use fp_offset. } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { - ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. + ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. } else { llvm_unreachable("Unhandled argument type in LowerVAARG"); } @@ -21792,11 +21877,8 @@ DAG.getConstant(Align, dl, MVT::i32)}; SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode( - X86ISD::VAARG_64, dl, - VTs, InstOps, MVT::i64, - MachinePointerInfo(SV), - /*Align=*/0, - MachineMemOperand::MOLoad | MachineMemOperand::MOStore); + X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV), + /*Align=*/0, MachineMemOperand::MOLoad | MachineMemOperand::MOStore); Chain = VAARG.getValue(1); // Load the next argument and return it @@ -21809,7 +21891,7 @@ // where a va_list is still an i8*. assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); if (Subtarget.isCallingConvWin64( - DAG.getMachineFunction().getFunction().getCallingConv())) + DAG.getMachineFunction().getFunction().getCallingConv())) // Probably a Win64 va_copy. return DAG.expandVACopy(Op.getNode()); @@ -21820,9 +21902,8 @@ const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); - return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, - DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false, - false, false, + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24, DL), + 8, /*isVolatile*/ false, false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -21869,8 +21950,9 @@ return DAG.getConstant(0, dl, VT); } - assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) - && "Unknown target vector shift-by-constant node"); + assert( + (Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) && + "Unknown target vector shift-by-constant node"); // Fold this packed vector shift into a build vector if SrcOp is a // vector of Constants or UNDEFs. @@ -21879,10 +21961,11 @@ unsigned NumElts = SrcOp->getNumOperands(); ConstantSDNode *ND; - switch(Opc) { - default: llvm_unreachable("Unknown opcode!"); + switch (Opc) { + default: + llvm_unreachable("Unknown opcode!"); case X86ISD::VSHLI: - for (unsigned i=0; i!=NumElts; ++i) { + for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { Elts.push_back(CurrentOp); @@ -21894,7 +21977,7 @@ } break; case X86ISD::VSRLI: - for (unsigned i=0; i!=NumElts; ++i) { + for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { Elts.push_back(CurrentOp); @@ -21906,7 +21989,7 @@ } break; case X86ISD::VSRAI: - for (unsigned i=0; i!=NumElts; ++i) { + for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { Elts.push_back(CurrentOp); @@ -21979,8 +22062,8 @@ } else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); - ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), - MVT::v2i64, ShAmt); + ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, + ShAmt); } else { SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; @@ -22015,17 +22098,17 @@ // In case 32bit mode, bitcast i64 is illegal, extend/split it. SDValue Lo, Hi; Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, - DAG.getConstant(0, dl, MVT::i32)); + DAG.getConstant(0, dl, MVT::i32)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, - DAG.getConstant(1, dl, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); Lo = DAG.getBitcast(MVT::v32i1, Lo); Hi = DAG.getBitcast(MVT::v32i1, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); } else { - MVT BitcastVT = MVT::getVectorVT(MVT::i1, - Mask.getSimpleValueType().getSizeInBits()); + MVT BitcastVT = + MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, @@ -22038,9 +22121,9 @@ /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, - SDValue PreservedSrc, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { + SDValue PreservedSrc, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); unsigned OpcodeSelect = ISD::VSELECT; @@ -22096,9 +22179,12 @@ // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See // WinEHStatePass for the full struct definition. switch (classifyEHPersonality(Fn->getPersonalityFn())) { - case EHPersonality::MSVC_X86SEH: return 24; - case EHPersonality::MSVC_CXX: return 16; - default: break; + case EHPersonality::MSVC_X86SEH: + return 24; + case EHPersonality::MSVC_CXX: + return 16; + default: + break; } report_fatal_error( "can only recover FP for 32-bit MSVC EH personality functions"); @@ -22184,9 +22270,9 @@ SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); MVT VT = Op.getSimpleValueType(); - const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); + const IntrinsicData *IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { - switch(IntrData->Type) { + switch (IntrData->Type) { case INTR_TYPE_1OP: { // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, @@ -22202,7 +22288,8 @@ if (!isRoundModeCurDirection(Rnd)) return SDValue(); } - return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1)); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + Op.getOperand(1)); } case INTR_TYPE_1OP_SAE: { SDValue Sae = Op.getOperand(2); @@ -22276,12 +22363,13 @@ return SDValue(); } - return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), - Src1, Src2, Src3); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src1, Src2, + Src3); } case INTR_TYPE_4OP: - return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(4)); case INTR_TYPE_1OP_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); @@ -22318,8 +22406,8 @@ else return SDValue(); - return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), - Mask, PassThru, Subtarget, DAG); + return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru, + Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK: { SDValue Src1 = Op.getOperand(1); @@ -22343,9 +22431,9 @@ if (!isRoundModeCurDirection(Rnd)) return SDValue(); } - return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, - Src2), - Mask, passThru, Subtarget, DAG); + return getScalarMaskingNode( + DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru, + Subtarget, DAG); } assert(Op.getNumOperands() == (6U + HasRounding) && @@ -22359,9 +22447,9 @@ else if (!isRoundModeCurDirection(Sae)) return SDValue(); } - return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, - Src2, RoundingMode), - Mask, passThru, Subtarget, DAG); + return getScalarMaskingNode( + DAG.getNode(Opc, dl, VT, Src1, Src2, RoundingMode), Mask, passThru, + Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RND: { SDValue Src1 = Op.getOperand(1); @@ -22396,8 +22484,8 @@ else return SDValue(); - return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), - Mask, passThru, Subtarget, DAG); + return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask, + passThru, Subtarget, DAG); } case INTR_TYPE_2OP_MASK: { SDValue Src1 = Op.getOperand(1); @@ -22433,8 +22521,8 @@ return SDValue(); } - return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), - Mask, PassThru, Subtarget, DAG); + return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask, + PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_SCALAR_MASK_SAE: { SDValue Src1 = Op.getOperand(1); @@ -22483,12 +22571,12 @@ // Reverse the operands to match VSELECT order. return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1); } - case VPERM_2OP : { + case VPERM_2OP: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); // Swap Src1 and Src2 in the node creation - return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); + return DAG.getNode(IntrData->Opc0, dl, VT, Src2, Src1); } case IFMA_OP: // NOTE: We need to swizzle the operands to pass the multiply operands @@ -22500,13 +22588,13 @@ SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); - SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(), - Subtarget, DAG); + SDValue FPclassMask = + getScalarMaskingNode(FPclass, Mask, SDValue(), Subtarget, DAG); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, - DAG.getConstant(0, dl, MVT::v8i1), - FPclassMask, DAG.getIntPtrConstant(0, dl)); + DAG.getConstant(0, dl, MVT::v8i1), FPclassMask, + DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(MVT::i8, Ins); } @@ -22525,9 +22613,9 @@ if (!isRoundModeCurDirection(Sae)) return SDValue(); } - //default rounding mode + // default rounding mode return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2), CC); + Op.getOperand(2), CC); } case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); @@ -22543,17 +22631,17 @@ else if (!isRoundModeCurDirection(Sae)) return SDValue(); } - //default rounding mode + // default rounding mode if (!Cmp.getNode()) Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); - SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(), - Subtarget, DAG); + SDValue CmpMask = + getScalarMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, - DAG.getConstant(0, dl, MVT::v8i1), - CmpMask, DAG.getIntPtrConstant(0, dl)); + DAG.getConstant(0, dl, MVT::v8i1), CmpMask, + DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(MVT::i8, Ins); } case COMI: { // Comparison intrinsics @@ -22612,8 +22700,8 @@ // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, - DAG.getConstant(0, dl, MVT::v16i1), - FCmp, DAG.getIntPtrConstant(0, dl)); + DAG.getConstant(0, dl, MVT::v16i1), FCmp, + DAG.getIntPtrConstant(0, dl)); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, DAG.getBitcast(MVT::i16, Ins)); } @@ -22666,9 +22754,9 @@ assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(2), - DAG.getConstant(0xf, dl, MVT::i32)); + SDValue RoundingMode = + DAG.getNode(ISD::AND, dl, MVT::i32, Op.getOperand(2), + DAG.getConstant(0xf, dl, MVT::i32)); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), RoundingMode); } @@ -22676,9 +22764,9 @@ assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(3), - DAG.getConstant(0xf, dl, MVT::i32)); + SDValue RoundingMode = + DAG.getNode(ISD::AND, dl, MVT::i32, Op.getOperand(3), + DAG.getConstant(0xf, dl, MVT::i32)); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), RoundingMode); } @@ -22700,7 +22788,7 @@ Op.getOperand(3), GenCF.getValue(1)); } SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); - SDValue Results[] = { SetCC, Res }; + SDValue Results[] = {SetCC, Res}; return DAG.getMergeValues(Results, dl); } case CVTPD2PS_MASK: @@ -22734,7 +22822,6 @@ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd, PassThru, Mask); - } case CVTNEPS2BF16_MASK: { SDValue Src = Op.getOperand(1); @@ -22757,7 +22844,8 @@ } switch (IntNo) { - default: return SDValue(); // Don't custom lower most intrinsics. + default: + return SDValue(); // Don't custom lower most intrinsics. // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest @@ -22791,7 +22879,8 @@ unsigned TestOpc = X86ISD::PTEST; X86::CondCode X86CC; switch (IntNo) { - default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); + default: + llvm_unreachable("Bad fallthrough in Intrinsic lowering."); case Intrinsic::x86_avx512_ktestc_b: case Intrinsic::x86_avx512_ktestc_w: case Intrinsic::x86_avx512_ktestc_d: @@ -22862,7 +22951,8 @@ unsigned Opcode; X86::CondCode X86CC; switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + default: + llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse42_pcmpistria128: Opcode = X86ISD::PCMPISTR; X86CC = X86::COND_A; @@ -22904,7 +22994,7 @@ X86CC = X86::COND_E; break; } - SmallVector NewOps(Op->op_begin()+1, Op->op_end()); + SmallVector NewOps(Op->op_begin() + 1, Op->op_end()); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2); SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG); @@ -22919,7 +23009,7 @@ else Opcode = X86ISD::PCMPESTR; - SmallVector NewOps(Op->op_begin()+1, Op->op_end()); + SmallVector NewOps(Op->op_begin() + 1, Op->op_end()); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps); } @@ -22932,7 +23022,7 @@ else Opcode = X86ISD::PCMPESTR; - SmallVector NewOps(Op->op_begin()+1, Op->op_end()); + SmallVector NewOps(Op->op_begin() + 1, Op->op_end()); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1); } @@ -23002,14 +23092,13 @@ SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other); SDLoc DL(Op); - SDValue Operation = - DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs, - Op->getOperand(1), Op->getOperand(2)); - - SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, - MaskVT, Operation); - SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, - MaskVT, Operation); + SDValue Operation = DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs, + Op->getOperand(1), Op->getOperand(2)); + + SDValue Result0 = + DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, MaskVT, Operation); + SDValue Result1 = + DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, MaskVT, Operation); return DAG.getMergeValues({Result0, Result1}, DL); } } @@ -23035,15 +23124,15 @@ MemIntrinsicSDNode *MemIntr = cast(Op); - SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; + SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale}; SDValue Res = DAG.getTargetMemSDNode( - VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand()); - return DAG.getMergeValues({ Res, Res.getValue(2) }, dl); + VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand()); + return DAG.getMergeValues({Res, Res.getValue(2)}, dl); } -static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, - SDValue Src, SDValue Mask, SDValue Base, - SDValue Index, SDValue ScaleOp, SDValue Chain, +static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, SDValue Src, + SDValue Mask, SDValue Base, SDValue Index, + SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -23070,16 +23159,16 @@ MemIntrinsicSDNode *MemIntr = cast(Op); - SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; + SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale}; SDValue Res = DAG.getTargetMemSDNode( - VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand()); - return DAG.getMergeValues({ Res, Res.getValue(2) }, dl); + VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand()); + return DAG.getMergeValues({Res, Res.getValue(2)}, dl); } static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, - SDValue Src, SDValue Mask, SDValue Base, - SDValue Index, SDValue ScaleOp, SDValue Chain, - const X86Subtarget &Subtarget) { + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain, + const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = dyn_cast(ScaleOp); // Scale must be constant. @@ -23116,8 +23205,8 @@ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); - MVT MaskVT = - MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); + MVT MaskVT = MVT::getVectorVT( + MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); @@ -23133,11 +23222,11 @@ /// expanded intrinsics implicitly defines extra registers (i.e. not just /// EDX:EAX). static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL, - SelectionDAG &DAG, - unsigned TargetOpcode, - unsigned SrcReg, - const X86Subtarget &Subtarget, - SmallVectorImpl &Results) { + SelectionDAG &DAG, + unsigned TargetOpcode, + unsigned SrcReg, + const X86Subtarget &Subtarget, + SmallVectorImpl &Results) { SDValue Chain = N->getOperand(0); SDValue Glue; @@ -23177,7 +23266,7 @@ } // Use a buildpair to merge the two 32-bit values into a 64-bit one. - SDValue Ops[] = { LO, HI }; + SDValue Ops[] = {LO, HI}; SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); Results.push_back(Pair); Results.push_back(Chain); @@ -23194,9 +23283,9 @@ // The processor's time-stamp counter (a 64-bit MSR) is stored into the // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR // and the EAX register is loaded with the low-order 32 bits. - SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode, - /* NoRegister */0, Subtarget, - Results); + SDValue Glue = + expandIntrinsicWChainHelper(N, DL, DAG, Opcode, + /* NoRegister */ 0, Subtarget, Results); if (Opcode != X86::RDTSCP) return; @@ -23254,30 +23343,33 @@ } /// Emit Truncating Store with signed or unsigned saturation. -static SDValue -EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, - SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, - SelectionDAG &DAG) { +static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, + SDValue Val, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO, SelectionDAG &DAG) { SDVTList VTs = DAG.getVTList(MVT::Other); SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); - SDValue Ops[] = { Chain, Val, Ptr, Undef }; - return SignedSat ? - DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : - DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); + SDValue Ops[] = {Chain, Val, Ptr, Undef}; + return SignedSat ? DAG.getTargetMemSDNode(VTs, Ops, Dl, + MemVT, MMO) + : DAG.getTargetMemSDNode(VTs, Ops, Dl, + MemVT, MMO); } /// Emit Masked Truncating Store with signed or unsigned saturation. -static SDValue -EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, - SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, SelectionDAG &DAG) { +static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, + const SDLoc &Dl, SDValue Val, SDValue Ptr, + SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, + SelectionDAG &DAG) { SDVTList VTs = DAG.getVTList(MVT::Other); - SDValue Ops[] = { Chain, Val, Ptr, Mask }; - return SignedSat ? - DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : - DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); + SDValue Ops[] = {Chain, Val, Ptr, Mask}; + return SignedSat + ? DAG.getTargetMemSDNode(VTs, Ops, Dl, + MemVT, MMO) + : DAG.getTargetMemSDNode(VTs, Ops, Dl, + MemVT, MMO); } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, @@ -23302,9 +23394,8 @@ SDLoc dl(Op); // Create a WRPKRU node, pass the input to the EAX parameter, and pass 0 // to the EDX and ECX parameters. - return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(2), - DAG.getConstant(0, dl, MVT::i32), + return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(2), DAG.getConstant(0, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); } case llvm::Intrinsic::x86_flags_read_u32: @@ -23329,7 +23420,8 @@ unsigned Opcode; switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); + default: + llvm_unreachable("Impossible intrinsic"); case Intrinsic::x86_umwait: Opcode = X86ISD::UMWAIT; break; @@ -23342,9 +23434,8 @@ break; } - SDValue Operation = - DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), - Op->getOperand(3), Op->getOperand(4)); + SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), + Op->getOperand(3), Op->getOperand(4)); SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); @@ -23356,7 +23447,8 @@ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); unsigned Opcode; switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic!"); + default: + llvm_unreachable("Impossible intrinsic!"); case Intrinsic::x86_enqcmd: Opcode = X86ISD::ENQCMD; break; @@ -23375,8 +23467,9 @@ } SDLoc dl(Op); - switch(IntrData->Type) { - default: llvm_unreachable("Unknown Intrinsic Type"); + switch (IntrData->Type) { + default: + llvm_unreachable("Unknown Intrinsic Type"); case RDSEED: case RDRAND: { // Emit the node with the right value type. @@ -23385,10 +23478,10 @@ // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. - SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), - DAG.getConstant(1, dl, Op->getValueType(1)), - DAG.getConstant(X86::COND_B, dl, MVT::i8), - SDValue(Result.getNode(), 1) }; + SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), + DAG.getConstant(1, dl, Op->getValueType(1)), + DAG.getConstant(X86::COND_B, dl, MVT::i8), + SDValue(Result.getNode(), 1)}; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); // Return { result, isValid, chain }. @@ -23397,32 +23490,32 @@ } case GATHER_AVX2: { SDValue Chain = Op.getOperand(0); - SDValue Src = Op.getOperand(2); - SDValue Base = Op.getOperand(3); + SDValue Src = Op.getOperand(2); + SDValue Base = Op.getOperand(3); SDValue Index = Op.getOperand(4); - SDValue Mask = Op.getOperand(5); + SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); } case GATHER: { - //gather(v1, mask, index, base, scale); + // gather(v1, mask, index, base, scale); SDValue Chain = Op.getOperand(0); - SDValue Src = Op.getOperand(2); - SDValue Base = Op.getOperand(3); + SDValue Src = Op.getOperand(2); + SDValue Base = Op.getOperand(3); SDValue Index = Op.getOperand(4); - SDValue Mask = Op.getOperand(5); + SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); - return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, - Chain, Subtarget); + return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, Chain, + Subtarget); } case SCATTER: { - //scatter(base, mask, index, v1, scale); + // scatter(base, mask, index, v1, scale); SDValue Chain = Op.getOperand(0); - SDValue Base = Op.getOperand(2); - SDValue Mask = Op.getOperand(3); + SDValue Base = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); SDValue Index = Op.getOperand(4); - SDValue Src = Op.getOperand(5); + SDValue Src = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); @@ -23434,9 +23527,9 @@ "Wrong prefetch hint in intrinsic: should be 2 or 3"); unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); - SDValue Mask = Op.getOperand(2); + SDValue Mask = Op.getOperand(2); SDValue Index = Op.getOperand(3); - SDValue Base = Op.getOperand(4); + SDValue Base = Op.getOperand(4); SDValue Scale = Op.getOperand(5); return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain, Subtarget); @@ -23468,8 +23561,8 @@ SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG); SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); - return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), - Ret, SDValue(InTrans.getNode(), 1)); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret, + SDValue(InTrans.getNode(), 1)); } case TRUNCATE_TO_MEM_VI8: case TRUNCATE_TO_MEM_VI16: @@ -23482,7 +23575,7 @@ MemIntrinsicSDNode *MemIntr = dyn_cast(Op); assert(MemIntr && "Expected MemIntrinsicSDNode!"); - EVT MemVT = MemIntr->getMemoryVT(); + EVT MemVT = MemIntr->getMemoryVT(); uint16_t TruncationOp = IntrData->Opc0; switch (TruncationOp) { @@ -23495,7 +23588,8 @@ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, - MemIntr->getMemOperand(), true /* truncating */); + MemIntr->getMemOperand(), + true /* truncating */); } case X86ISD::VTRUNCUS: case X86ISD::VTRUNCS: { @@ -23576,7 +23670,7 @@ unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); - SDLoc dl(Op); // FIXME probably not meaningful + SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && @@ -23590,17 +23684,17 @@ // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, +unsigned X86TargetLowering::getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const { const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); const MachineFunction &MF = DAG.getMachineFunction(); unsigned Reg = StringSwitch(RegName) - .Case("esp", X86::ESP) - .Case("rsp", X86::RSP) - .Case("ebp", X86::EBP) - .Case("rbp", X86::RBP) - .Default(0); + .Case("esp", X86::ESP) + .Case("rsp", X86::RSP) + .Case("ebp", X86::EBP) + .Case("rbp", X86::RBP) + .Default(0); if (Reg == X86::EBP || Reg == X86::RBP) { if (!TFI.hasFP(MF)) @@ -23649,10 +23743,10 @@ } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - SDValue Offset = Op.getOperand(1); - SDValue Handler = Op.getOperand(2); - SDLoc dl (Op); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -23663,9 +23757,9 @@ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; - SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, - DAG.getIntPtrConstant(RegInfo->getSlotSize(), - dl)); + SDValue StoreAddr = + DAG.getNode(ISD::ADD, dl, PtrVT, Frame, + DAG.getIntPtrConstant(RegInfo->getSlotSize(), dl)); StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo()); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); @@ -23688,19 +23782,20 @@ (void)TII->getGlobalBaseReg(&DAG.getMachineFunction()); } return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, - DAG.getVTList(MVT::i32, MVT::Other), - Op.getOperand(0), Op.getOperand(1)); + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), + Op.getOperand(1)); } SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); + return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), + Op.getOperand(1)); } -SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, - SelectionDAG &DAG) const { +SDValue +X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other, Op.getOperand(0)); @@ -23716,7 +23811,7 @@ SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value - SDLoc dl (Op); + SDLoc dl(Op); const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -23725,7 +23820,7 @@ SDValue OutChains[6]; // Large code-model. - const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. + const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; @@ -23775,7 +23870,7 @@ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } else { const Function *Func = - cast(cast(Op.getOperand(5))->getValue()); + cast(cast(Op.getOperand(5))->getValue()); CallingConv::ID CC = Func->getCallingConv(); unsigned NestReg; @@ -23797,7 +23892,8 @@ unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); I != E; ++I, ++Idx) + E = FTy->param_end(); + I != E; ++I, ++Idx) if (Attrs.hasAttribute(Idx, Attribute::InReg)) { auto &DL = DAG.getDataLayout(); // FIXME: should only count parameters that are lowered to integers. @@ -23893,36 +23989,34 @@ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, 2, 2); - SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; - SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, - DAG.getVTList(MVT::Other), - Ops, MVT::i16, MMO); + SDValue Ops[] = {DAG.getEntryNode(), StackSlot}; + SDValue Chain = DAG.getMemIntrinsicNode( + X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo()); // Transform as necessary - SDValue CWD1 = - DAG.getNode(ISD::SRL, DL, MVT::i16, - DAG.getNode(ISD::AND, DL, MVT::i16, - CWD, DAG.getConstant(0x800, DL, MVT::i16)), - DAG.getConstant(11, DL, MVT::i8)); - SDValue CWD2 = - DAG.getNode(ISD::SRL, DL, MVT::i16, - DAG.getNode(ISD::AND, DL, MVT::i16, - CWD, DAG.getConstant(0x400, DL, MVT::i16)), - DAG.getConstant(9, DL, MVT::i8)); + SDValue CWD1 = DAG.getNode(ISD::SRL, DL, MVT::i16, + DAG.getNode(ISD::AND, DL, MVT::i16, CWD, + DAG.getConstant(0x800, DL, MVT::i16)), + DAG.getConstant(11, DL, MVT::i8)); + SDValue CWD2 = DAG.getNode(ISD::SRL, DL, MVT::i16, + DAG.getNode(ISD::AND, DL, MVT::i16, CWD, + DAG.getConstant(0x400, DL, MVT::i16)), + DAG.getConstant(9, DL, MVT::i8)); SDValue RetVal = - DAG.getNode(ISD::AND, DL, MVT::i16, - DAG.getNode(ISD::ADD, DL, MVT::i16, - DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), - DAG.getConstant(1, DL, MVT::i16)), - DAG.getConstant(3, DL, MVT::i16)); - - return DAG.getNode((VT.getSizeInBits() < 16 ? - ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); + DAG.getNode(ISD::AND, DL, MVT::i16, + DAG.getNode(ISD::ADD, DL, MVT::i16, + DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), + DAG.getConstant(1, DL, MVT::i16)), + DAG.getConstant(3, DL, MVT::i16)); + + return DAG.getNode( + (VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, + RetVal); } // Split an unary integer op into 2 half sized ops. @@ -23976,17 +24070,15 @@ MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); - assert((EltVT == MVT::i8 || EltVT == MVT::i16) && - "Unsupported element type"); + assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type"); // Split vector, it's Lo and Hi parts will be handled in next iteration. - if (NumElems > 16 || - (NumElems == 16 && !Subtarget.canExtendTo512DQ())) + if (NumElems > 16 || (NumElems == 16 && !Subtarget.canExtendTo512DQ())) return LowerVectorIntUnary(Op, DAG); MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && - "Unsupported value type for operation"); + "Unsupported value type for operation"); // Use native supported vector instruction vplzcntd. Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); @@ -24126,12 +24218,8 @@ if (Opc == ISD::CTLZ) { // If src is zero (i.e. bsr sets ZF), returns NumBits. - SDValue Ops[] = { - Op, - DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), - DAG.getConstant(X86::COND_E, dl, MVT::i8), - Op.getValue(1) - }; + SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), + DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1)}; Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); } @@ -24159,12 +24247,8 @@ Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); // If src is zero (i.e. bsf sets ZF), returns NumBits. - SDValue Ops[] = { - Op, - DAG.getConstant(NumBits, dl, VT), - DAG.getConstant(X86::COND_E, dl, MVT::i8), - Op.getValue(1) - }; + SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), + DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1)}; return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } @@ -24190,7 +24274,7 @@ SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl); MVT EltVT = VT.getVectorElementType(); - MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), @@ -24219,7 +24303,7 @@ SDValue RHS2 = extract256BitVector(RHS, NumElems / 2, DAG, dl); MVT EltVT = VT.getVectorElementType(); - MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), @@ -24233,8 +24317,8 @@ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); if (VT.getScalarType() == MVT::i1) - return DAG.getNode(ISD::XOR, SDLoc(Op), VT, - Op.getOperand(0), Op.getOperand(1)); + return DAG.getNode(ISD::XOR, SDLoc(Op), VT, Op.getOperand(0), + Op.getOperand(1)); assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && @@ -24250,7 +24334,8 @@ if (VT.getScalarType() == MVT::i1) { SDLoc dl(Op); switch (Opcode) { - default: llvm_unreachable("Expected saturated arithmetic opcode"); + default: + llvm_unreachable("Expected saturated arithmetic opcode"); case ISD::UADDSAT: case ISD::SADDSAT: // *addsat i1 X, Y --> X | Y @@ -24265,8 +24350,8 @@ if (VT.is128BitVector()) { // Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(), - *DAG.getContext(), VT); + EVT SetCCResultType = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDLoc DL(Op); if (Opcode == ISD::UADDSAT && !TLI.isOperationLegal(ISD::UMIN, VT)) { // uaddsat X, Y --> (X >u (X + Y)) ? -1 : X + Y @@ -24352,11 +24437,20 @@ // Else, expand to a compare/select. ISD::CondCode CC; switch (Opcode) { - case ISD::SMIN: CC = ISD::CondCode::SETLT; break; - case ISD::SMAX: CC = ISD::CondCode::SETGT; break; - case ISD::UMIN: CC = ISD::CondCode::SETULT; break; - case ISD::UMAX: CC = ISD::CondCode::SETUGT; break; - default: llvm_unreachable("Unknown MINMAX opcode"); + case ISD::SMIN: + CC = ISD::CondCode::SETLT; + break; + case ISD::SMAX: + CC = ISD::CondCode::SETGT; + break; + case ISD::UMIN: + CC = ISD::CondCode::SETULT; + break; + case ISD::UMAX: + CC = ISD::CondCode::SETUGT; + break; + default: + llvm_unreachable("Unknown MINMAX opcode"); } SDValue Cond = DAG.getSetCC(DL, VT, N0, N1, CC); @@ -24409,10 +24503,10 @@ SmallVector LoOps, HiOps; for (unsigned i = 0; i != NumElts; i += 16) { for (unsigned j = 0; j != 8; ++j) { - LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, - MVT::i16)); - HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, - MVT::i16)); + LoOps.push_back( + DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, MVT::i16)); + HiOps.push_back( + DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, MVT::i16)); } } @@ -24437,7 +24531,7 @@ "Should not custom lower when pmulld is available!"); // Extract the odd parts. - static const int UnpackMask[] = { 1, -1, 3, -1 }; + static const int UnpackMask[] = {1, -1, 3, -1}; SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); @@ -24455,7 +24549,7 @@ // Merge the two vectors back together with a shuffle. This expands into 2 // shuffles. - static const int ShufMask[] = { 0, 4, 2, 6 }; + static const int ShufMask[] = {0, 4, 2, 6}; return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); } @@ -24538,14 +24632,14 @@ // // Place the odd value at an even position (basically, shift all values 1 // step to the left): - const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, + const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1}; // => - SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A, - makeArrayRef(&Mask[0], NumElts)); + SDValue Odd0 = + DAG.getVectorShuffle(VT, dl, A, A, makeArrayRef(&Mask[0], NumElts)); // => - SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B, - makeArrayRef(&Mask[0], NumElts)); + SDValue Odd1 = + DAG.getVectorShuffle(VT, dl, B, B, makeArrayRef(&Mask[0], NumElts)); // Emit two multiplies, one for the lower 2 ints and one for the higher 2 // ints. @@ -24588,7 +24682,7 @@ // Only i8 vectors should need custom lowering after this. assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || - (VT == MVT::v64i8 && Subtarget.hasBWI())) && + (VT == MVT::v64i8 && Subtarget.hasBWI())) && "Unsupported vector type"); // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply, @@ -24634,10 +24728,9 @@ Lo = DAG.getBitcast(VT, Lo); Hi = DAG.getBitcast(VT, Hi); return DAG.getVectorShuffle(VT, dl, Lo, Hi, - { 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30, - 32, 34, 36, 38, 40, 42, 44, 46, - 48, 50, 52, 54, 56, 58, 60, 62}); + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, + 44, 46, 48, 50, 52, 54, 56, 58, 60, 62}); } // For signed v16i8 and all unsigned vXi8 we will unpack the low and high @@ -24646,7 +24739,7 @@ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - static const int PSHUFDMask[] = { 8, 9, 10, 11, 12, 13, 14, 15, + static const int PSHUFDMask[] = {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}; // Extract the lo parts and zero/sign extend to i16. @@ -24669,10 +24762,10 @@ ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG); AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG); } else { - ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, - DAG.getConstant(0, dl, VT))); - AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, - DAG.getConstant(0, dl, VT))); + ALo = DAG.getBitcast( + ExVT, getUnpackl(DAG, dl, VT, A, DAG.getConstant(0, dl, VT))); + AHi = DAG.getBitcast( + ExVT, getUnpackh(DAG, dl, VT, A, DAG.getConstant(0, dl, VT))); } SDValue BLo, BHi; @@ -24711,10 +24804,10 @@ BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG); BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG); } else { - BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, - DAG.getConstant(0, dl, VT))); - BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, - DAG.getConstant(0, dl, VT))); + BLo = DAG.getBitcast( + ExVT, getUnpackl(DAG, dl, VT, B, DAG.getConstant(0, dl, VT))); + BHi = DAG.getBitcast( + ExVT, getUnpackh(DAG, dl, VT, B, DAG.getConstant(0, dl, VT))); } // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and @@ -24728,7 +24821,8 @@ return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } -SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, + SelectionDAG &DAG) const { assert(Subtarget.isTargetWin64() && "Unexpected target"); EVT VT = Op.getValueType(); assert(VT.isInteger() && VT.getSizeInBits() == 128 && @@ -24737,13 +24831,32 @@ RTLIB::Libcall LC; bool isSigned; switch (Op->getOpcode()) { - default: llvm_unreachable("Unexpected request for libcall!"); - case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; - case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; - case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; - case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; - case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break; - case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break; + default: + llvm_unreachable("Unexpected request for libcall!"); + case ISD::SDIV: + isSigned = true; + LC = RTLIB::SDIV_I128; + break; + case ISD::UDIV: + isSigned = false; + LC = RTLIB::UDIV_I128; + break; + case ISD::SREM: + isSigned = true; + LC = RTLIB::SREM_I128; + break; + case ISD::UREM: + isSigned = false; + LC = RTLIB::UREM_I128; + break; + case ISD::SDIVREM: + isSigned = true; + LC = RTLIB::SDIVREM_I128; + break; + case ISD::UDIVREM: + isSigned = false; + LC = RTLIB::UDIVREM_I128; + break; } SDLoc dl(Op); @@ -24760,7 +24873,7 @@ InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(), /* Alignment = */ 16); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Ty = PointerType::get(ArgTy,0); + Entry.Ty = PointerType::get(ArgTy, 0); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); @@ -24805,9 +24918,9 @@ // The shift amount is a variable, but it is the same for all vector lanes. // These instructions are defined together with shift-immediate. -static -bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget, - unsigned Opcode) { +static bool SupportedVectorShiftWithBaseAmnt(MVT VT, + const X86Subtarget &Subtarget, + unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } @@ -24827,7 +24940,7 @@ return true; bool LShift = VT.is128BitVector() || VT.is256BitVector(); - bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; + bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; return (Opcode == ISD::SRA) ? AShift : LShift; } @@ -25094,8 +25207,8 @@ return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo), - DAG.getBitcast(VT, Hi), - {0, 2, 4, 6, 8, 10, 12, 14}); + DAG.getBitcast(VT, Hi), + {0, 2, 4, 6, 8, 10, 12, 14}); } return SDValue(); @@ -25874,7 +25987,7 @@ // If this is a canonical idempotent atomicrmw w/no uses, we have a better // lowering available in lowerAtomicArith. - // TODO: push more cases through this path. + // TODO: push more cases through this path. if (auto *C = dyn_cast(AI->getValOperand())) if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && AI->use_empty()) @@ -25885,7 +25998,8 @@ auto SSID = AI->getSyncScopeID(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. - auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); + auto Order = + AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); // Before the load we need a fence. Here is an example lifted from // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence @@ -25932,31 +26046,31 @@ /// Emit a locked operation on a stack location which does not change any /// memory location, but does involve a lock prefix. Location is chosen to be /// a) very likely accessed only by a single thread to minimize cache traffic, -/// and b) definitely dereferenceable. Returns the new Chain result. +/// and b) definitely dereferenceable. Returns the new Chain result. static SDValue emitLockedStackOp(SelectionDAG &DAG, - const X86Subtarget &Subtarget, - SDValue Chain, SDLoc DL) { + const X86Subtarget &Subtarget, SDValue Chain, + SDLoc DL) { // Implementation notes: // 1) LOCK prefix creates a full read/write reordering barrier for memory // operations issued by the current processor. As such, the location // referenced is not relevant for the ordering properties of the instruction. // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual, - // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions + // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions // 2) Using an immediate operand appears to be the best encoding choice // here since it doesn't require an extra register. // 3) OR appears to be very slightly faster than ADD. (Though, the difference // is small enough it might just be measurement noise.) // 4) When choosing offsets, there are several contributing factors: // a) If there's no redzone, we default to TOS. (We could allocate a cache - // line aligned stack object to improve this case.) + // line aligned stack object to improve this case.) // b) To minimize our chances of introducing a false dependence, we prefer - // to offset the stack usage from TOS slightly. + // to offset the stack usage from TOS slightly. // c) To minimize concerns about cross thread stack usage - in particular, // the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which // captures state in the TOS frame and accesses it from many threads - // we want to use an offset such that the offset is in a distinct cache // line from the TOS frame. - // + // // For a general discussion of the tradeoffs and benchmark results, see: // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ @@ -25966,31 +26080,28 @@ if (Subtarget.is64Bit()) { SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); - SDValue Ops[] = { - DAG.getRegister(X86::RSP, MVT::i64), // Base - DAG.getTargetConstant(1, DL, MVT::i8), // Scale - DAG.getRegister(0, MVT::i64), // Index - DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp - DAG.getRegister(0, MVT::i16), // Segment. - Zero, - Chain}; - SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, - MVT::Other, Ops); + SDValue Ops[] = {DAG.getRegister(X86::RSP, MVT::i64), // Base + DAG.getTargetConstant(1, DL, MVT::i8), // Scale + DAG.getRegister(0, MVT::i64), // Index + DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp + DAG.getRegister(0, MVT::i16), // Segment. + Zero, + Chain}; + SDNode *Res = + DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops); return SDValue(Res, 1); } SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); - SDValue Ops[] = { - DAG.getRegister(X86::ESP, MVT::i32), // Base - DAG.getTargetConstant(1, DL, MVT::i8), // Scale - DAG.getRegister(0, MVT::i32), // Index - DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp - DAG.getRegister(0, MVT::i16), // Segment. - Zero, - Chain - }; - SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, - MVT::Other, Ops); + SDValue Ops[] = {DAG.getRegister(X86::ESP, MVT::i32), // Base + DAG.getTargetConstant(1, DL, MVT::i8), // Scale + DAG.getRegister(0, MVT::i32), // Index + DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp + DAG.getRegister(0, MVT::i16), // Segment. + Zero, + Chain}; + SDNode *Res = + DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops); return SDValue(Res, 1); } @@ -25998,9 +26109,9 @@ SelectionDAG &DAG) { SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast( - cast(Op.getOperand(1))->getZExtValue()); + cast(Op.getOperand(1))->getZExtValue()); SyncScope::ID FenceSSID = static_cast( - cast(Op.getOperand(2))->getZExtValue()); + cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. @@ -26009,7 +26120,7 @@ if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); return emitLockedStackOp(DAG, Subtarget, Chain, dl); } @@ -26023,36 +26134,44 @@ SDLoc DL(Op); unsigned Reg = 0; unsigned size = 0; - switch(T.SimpleTy) { - default: llvm_unreachable("Invalid value type!"); - case MVT::i8: Reg = X86::AL; size = 1; break; - case MVT::i16: Reg = X86::AX; size = 2; break; - case MVT::i32: Reg = X86::EAX; size = 4; break; + switch (T.SimpleTy) { + default: + llvm_unreachable("Invalid value type!"); + case MVT::i8: + Reg = X86::AL; + size = 1; + break; + case MVT::i16: + Reg = X86::AX; + size = 2; + break; + case MVT::i32: + Reg = X86::EAX; + size = 4; + break; case MVT::i64: assert(Subtarget.is64Bit() && "Node not type legal!"); - Reg = X86::RAX; size = 8; + Reg = X86::RAX; + size = 8; break; } - SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, - Op.getOperand(2), SDValue()); - SDValue Ops[] = { cpIn.getValue(0), - Op.getOperand(1), - Op.getOperand(3), - DAG.getTargetConstant(size, DL, MVT::i8), - cpIn.getValue(1) }; + SDValue cpIn = + DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue()); + SDValue Ops[] = {cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), + DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1)}; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(Op)->getMemOperand(); - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, - Ops, T, MMO); + SDValue Result = + DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO); SDValue cpOut = - DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); + DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, MVT::i32, cpOut.getValue(2)); SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG); - return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), - cpOut, Success, EFLAGS.getValue(1)); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), cpOut, Success, + EFLAGS.getValue(1)); } // Create MOVMSKB, taking into account whether we need to split for AVX1. @@ -26096,7 +26215,7 @@ // Custom splitting for BWI types when AVX512F is available but BWI isn't. if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() && - DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) { + DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) { SDLoc dl(Op); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); @@ -26131,8 +26250,8 @@ // Example: from MVT::v2i32 to MVT::v4i32. MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), SrcVT.getVectorNumElements() * 2); - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, - DAG.getUNDEF(SrcVT)); + Src = + DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, DAG.getUNDEF(SrcVT)); } else { assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && "Unexpected source type in LowerBITCAST"); @@ -26149,15 +26268,15 @@ DAG.getIntPtrConstant(0, dl)); } - assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && - Subtarget.hasMMX() && "Unexpected custom BITCAST"); + assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && Subtarget.hasMMX() && + "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || - (DstVT.isVector() && DstVT.getSizeInBits()==64)) && + (DstVT.isVector() && DstVT.getSizeInBits() == 64)) && "Unexpected custom BITCAST"); // i64 <=> MMX conversions are Legal. - if (SrcVT==MVT::i64 && DstVT.isVector()) + if (SrcVT == MVT::i64 && DstVT.isVector()) return Op; - if (DstVT==MVT::i64 && SrcVT.isVector()) + if (DstVT == MVT::i64 && SrcVT.isVector()) return Op; // MMX <=> MMX conversions are Legal. if (SrcVT.isVector() && DstVT.isVector()) @@ -26295,7 +26414,8 @@ if (Subtarget.hasVPOPCNTDQ()) { unsigned NumElems = VT.getVectorNumElements(); assert((VT.getVectorElementType() == MVT::i8 || - VT.getVectorElementType() == MVT::i16) && "Unexpected type"); + VT.getVectorElementType() == MVT::i16) && + "Unexpected type"); if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) { MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); @@ -26480,8 +26600,8 @@ // select LXADD if LOCK_SUB can't be selected. if (Opc == ISD::ATOMIC_LOAD_SUB) { RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); - return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, - RHS, AN->getMemOperand()); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, + AN->getMemOperand()); } assert(Opc == ISD::ATOMIC_LOAD_ADD && "Used AtomicRMW ops other than Add should have been expanded!"); @@ -26498,12 +26618,12 @@ // seq_cst which isn't SingleThread, everything just needs to be preserved // during codegen and then dropped. Note that we expect (but don't assume), // that orderings other than seq_cst and acq_rel have been canonicalized to - // a store or load. + // a store or load. if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent && AN->getSyncScopeID() == SyncScope::System) { // Prefer a locked operation against a stack location to minimize cache // traffic. This assumes that stack locations are very likely to be - // accessed only by the owning thread. + // accessed only by the owning thread. SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. @@ -26514,16 +26634,16 @@ SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain); assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. - return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), - DAG.getUNDEF(VT), NewChain); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT), + NewChain); } SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget); // RAUW the chain, but don't worry about the result, as it's unused. assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. - return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), - DAG.getUNDEF(VT), LockOp.getValue(1)); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT), + LockOp.getValue(1)); } static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, @@ -26552,10 +26672,10 @@ SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getOperand(2)); SDVTList Tys = DAG.getVTList(MVT::Other); - SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() }; - SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, - Ops, MVT::i64, - Node->getMemOperand()); + SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()}; + SDValue Chain = + DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, + MVT::i64, Node->getMemOperand()); // If this is a sequentially consistent store, also emit an appropriate // barrier. @@ -26569,11 +26689,9 @@ // Convert seq_cst store -> xchg // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b) // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment. - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, - Node->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Node->getOperand(2), - Node->getMemOperand()); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, Node->getMemoryVT(), + Node->getOperand(0), Node->getOperand(1), + Node->getOperand(2), Node->getMemOperand()); return Swap.getValue(1); } @@ -26592,12 +26710,12 @@ SDValue Carry = Op.getOperand(2); EVT CarryVT = Carry.getValueType(); APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); - Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), - Carry, DAG.getConstant(NegOne, DL, CarryVT)); + Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry, + DAG.getConstant(NegOne, DL, CarryVT)); unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB; - SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), - Op.getOperand(1), Carry.getValue(1)); + SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), Op.getOperand(1), + Carry.getValue(1)); SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG); if (N->getValueType(1) == MVT::i1) @@ -26681,8 +26799,7 @@ "Unexpected request for vector widening"); SDLoc dl(InOp); - if (InOp.getOpcode() == ISD::CONCAT_VECTORS && - InOp.getNumOperands() == 2) { + if (InOp.getOpcode() == ISD::CONCAT_VECTORS && InOp.getNumOperands() == 2) { SDValue N1 = InOp.getOperand(1); if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || N1.isUndef()) { @@ -26699,16 +26816,16 @@ EVT EltVT = InOp.getOperand(0).getValueType(); - SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : - DAG.getUNDEF(EltVT); + SDValue FillVal = + FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) Ops.push_back(FillVal); return DAG.getBuildVector(NVT, dl, Ops); } - SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : - DAG.getUNDEF(NVT); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, - InOp, DAG.getIntPtrConstant(0, dl)); + SDValue FillVal = + FillWithZeroes ? DAG.getConstant(0, dl, NVT) : DAG.getUNDEF(NVT); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, InOp, + DAG.getIntPtrConstant(0, dl)); } static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, @@ -26780,8 +26897,8 @@ if (!Subtarget.hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { // Determine how much we need to widen by to get a 512-bit type. - unsigned Factor = std::min(512/VT.getSizeInBits(), - 512/IndexVT.getSizeInBits()); + unsigned Factor = + std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits()); unsigned NumElts = VT.getVectorNumElements() * Factor; VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); @@ -26818,10 +26935,10 @@ assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); - assert((ScalarVT.getSizeInBits() >= 32 || - (Subtarget.hasBWI() && - (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && - "Unsupported masked load op."); + assert( + (ScalarVT.getSizeInBits() >= 32 || + (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && + "Unsupported masked load op."); // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit @@ -26836,15 +26953,14 @@ MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), - N->getBasePtr(), Mask, PassThru, - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); - - SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - NewLoad.getValue(0), - DAG.getIntPtrConstant(0, dl)); + SDValue NewLoad = + DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), + N->getExtensionType(), N->isExpandingLoad()); + + SDValue Exract = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), + DAG.getIntPtrConstant(0, dl)); SDValue RetOps[] = {Exract, NewLoad.getValue(1)}; return DAG.getMergeValues(RetOps, dl); } @@ -26867,14 +26983,14 @@ assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); - assert((ScalarVT.getSizeInBits() >= 32 || - (Subtarget.hasBWI() && - (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && - "Unsupported masked store op."); + assert( + (ScalarVT.getSizeInBits() >= 32 || + (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && + "Unsupported masked store op."); // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit - unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); // Mask element has to be i1. @@ -26916,8 +27032,8 @@ if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && !IndexVT.is512BitVector()) { // Determine how much we need to widen by to get a 512-bit type. - unsigned Factor = std::min(512/VT.getSizeInBits(), - 512/IndexVT.getSizeInBits()); + unsigned Factor = + std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits()); unsigned NumElts = VT.getVectorNumElements() * Factor; @@ -26930,13 +27046,13 @@ Mask = ExtendToType(Mask, MaskVT, DAG, true); } - SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, - N->getScale() }; + SDValue Ops[] = {N->getChain(), PassThru, Mask, + N->getBasePtr(), Index, N->getScale()}; SDValue NewGather = DAG.getTargetMemSDNode( DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), N->getMemOperand()); - SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, - NewGather, DAG.getIntPtrConstant(0, dl)); + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, NewGather, + DAG.getIntPtrConstant(0, dl)); return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl); } @@ -26985,123 +27101,199 @@ /// Provide custom lowering hooks for some operations. SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - default: llvm_unreachable("Should not custom lower this!"); - case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); + default: + llvm_unreachable("Should not custom lower this!"); + case ISD::ATOMIC_FENCE: + return LowerATOMIC_FENCE(Op, Subtarget, DAG); case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return LowerCMP_SWAP(Op, Subtarget, DAG); - case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG); + case ISD::CTPOP: + return LowerCTPOP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); - case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget); - case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); - case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); - case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG); - case ISD::VSELECT: return LowerVSELECT(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); - case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); - case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::ATOMIC_LOAD_AND: + return lowerAtomicArith(Op, DAG, Subtarget); + case ISD::ATOMIC_STORE: + return LowerATOMIC_STORE(Op, DAG, Subtarget); + case ISD::BITREVERSE: + return LowerBITREVERSE(Op, Subtarget, DAG); + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, Subtarget, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVectorShuffle(Op, Subtarget, DAG); + case ISD::VSELECT: + return LowerVSELECT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_SUBVECTOR: + return LowerINSERT_SUBVECTOR(Op, Subtarget, DAG); + case ISD::EXTRACT_SUBVECTOR: + return LowerEXTRACT_SUBVECTOR(Op, Subtarget, DAG); + case ISD::SCALAR_TO_VECTOR: + return LowerSCALAR_TO_VECTOR(Op, Subtarget, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + case ISD::ExternalSymbol: + return LowerExternalSymbol(Op, DAG); + case ISD::BlockAddress: + return LowerBlockAddress(Op, DAG); case ISD::SHL_PARTS: case ISD::SRA_PARTS: - case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); + case ISD::SRL_PARTS: + return LowerShiftParts(Op, DAG); case ISD::FSHL: - case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); - case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); - case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); - case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); - case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); - case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); - case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); + case ISD::FSHR: + return LowerFunnelShift(Op, Subtarget, DAG); + case ISD::SINT_TO_FP: + return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: + return LowerUINT_TO_FP(Op, DAG); + case ISD::TRUNCATE: + return LowerTRUNCATE(Op, DAG); + case ISD::ZERO_EXTEND: + return LowerZERO_EXTEND(Op, Subtarget, DAG); + case ISD::SIGN_EXTEND: + return LowerSIGN_EXTEND(Op, Subtarget, DAG); + case ISD::ANY_EXTEND: + return LowerANY_EXTEND(Op, Subtarget, DAG); case ISD::ZERO_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG); case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); - case ISD::STORE: return LowerStore(Op, Subtarget, DAG); + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG); + case ISD::FP_EXTEND: + return LowerFP_EXTEND(Op, DAG); + case ISD::LOAD: + return LowerLoad(Op, Subtarget, DAG); + case ISD::STORE: + return LowerStore(Op, Subtarget, DAG); case ISD::FADD: - case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget); + case ISD::FSUB: + return lowerFaddFsub(Op, DAG, Subtarget); case ISD::FABS: - case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); - case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); - case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::VAARG: return LowerVAARG(Op, DAG); - case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::FNEG: + return LowerFABSorFNEG(Op, DAG); + case ISD::FCOPYSIGN: + return LowerFCOPYSIGN(Op, DAG); + case ISD::FGETSIGN: + return LowerFGETSIGN(Op, DAG); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::SETCCCARRY: + return LowerSETCCCARRY(Op, DAG); + case ISD::SELECT: + return LowerSELECT(Op, DAG); + case ISD::BRCOND: + return LowerBRCOND(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::VACOPY: + return LowerVACOPY(Op, Subtarget, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG); + case ISD::ADDROFRETURNADDR: + return LowerADDROFRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: - return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); - case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::EH_RETURN: + return LowerEH_RETURN(Op, DAG); + case ISD::EH_SJLJ_SETJMP: + return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: + return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); - case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); - case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); - case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::INIT_TRAMPOLINE: + return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: + return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::FLT_ROUNDS_: + return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); + case ISD::CTLZ_ZERO_UNDEF: + return LowerCTLZ(Op, Subtarget, DAG); case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG); - case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); + case ISD::CTTZ_ZERO_UNDEF: + return LowerCTTZ(Op, Subtarget, DAG); + case ISD::MUL: + return LowerMUL(Op, Subtarget, DAG); case ISD::MULHS: - case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); + case ISD::MULHU: + return LowerMULH(Op, Subtarget, DAG); case ISD::ROTL: - case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); + case ISD::ROTR: + return LowerRotate(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: - case ISD::SHL: return LowerShift(Op, Subtarget, DAG); + case ISD::SHL: + return LowerShift(Op, Subtarget, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: case ISD::USUBO: case ISD::SMULO: - case ISD::UMULO: return LowerXALUO(Op, DAG); - case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); - case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); + case ISD::UMULO: + return LowerXALUO(Op, DAG); + case ISD::READCYCLECOUNTER: + return LowerREADCYCLECOUNTER(Op, Subtarget, DAG); + case ISD::BITCAST: + return LowerBITCAST(Op, Subtarget, DAG); case ISD::ADDCARRY: - case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); + case ISD::SUBCARRY: + return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: - case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget); + case ISD::SUB: + return lowerAddSub(Op, DAG, Subtarget); case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: - case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG, Subtarget); + case ISD::SSUBSAT: + return LowerADDSAT_SUBSAT(Op, DAG, Subtarget); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: - case ISD::UMIN: return LowerMINMAX(Op, DAG); - case ISD::ABS: return LowerABS(Op, Subtarget, DAG); - case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); - case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); - case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); - case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); - case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); + case ISD::UMIN: + return LowerMINMAX(Op, DAG); + case ISD::ABS: + return LowerABS(Op, Subtarget, DAG); + case ISD::FSINCOS: + return LowerFSINCOS(Op, Subtarget, DAG); + case ISD::MLOAD: + return LowerMLOAD(Op, Subtarget, DAG); + case ISD::MSTORE: + return LowerMSTORE(Op, Subtarget, DAG); + case ISD::MGATHER: + return LowerMGATHER(Op, Subtarget, DAG); + case ISD::MSCATTER: + return LowerMSCATTER(Op, Subtarget, DAG); case ISD::GC_TRANSITION_START: - return LowerGC_TRANSITION_START(Op, DAG); - case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG); + return LowerGC_TRANSITION_START(Op, DAG); + case ISD::GC_TRANSITION_END: + return LowerGC_TRANSITION_END(Op, DAG); } } @@ -27127,7 +27319,7 @@ // If the original node has multiple results, then the return node should // have the same number of results. assert((N->getNumValues() == Res->getNumValues()) && - "Lowering returned the wrong number of results!"); + "Lowering returned the wrong number of results!"); // Places new result values base on N result number. for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) @@ -27137,7 +27329,7 @@ /// Replace a node with an illegal result type with a new node built out of /// custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl&Results, + SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc dl(N); switch (N->getOpcode()) { @@ -27169,10 +27361,10 @@ if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger && VT.getVectorNumElements() == 2) { // Promote to a pattern that will be turned into PMULUDQ. - SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(0)); - SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(1)); + SDValue N0 = + DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, N->getOperand(0)); + SDValue N1 = + DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, N->getOperand(1)); SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul)); } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && @@ -27208,11 +27400,10 @@ "Expected a VT that divides into 128 bits."); unsigned NumConcat = 128 / InVT.getSizeInBits(); - EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), - NumConcat * InVT.getVectorNumElements()); - EVT WideVT = EVT::getVectorVT(*DAG.getContext(), - VT.getVectorElementType(), + EVT InWideVT = + EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + NumConcat * InVT.getVectorNumElements()); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumConcat * VT.getVectorNumElements()); SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); @@ -27266,8 +27457,8 @@ N->getOperand(0), UNDEF); SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(1), UNDEF); - SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, - N->getOperand(2)); + SDValue Res = + DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, N->getOperand(2)); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); @@ -27326,7 +27517,7 @@ } case ISD::SDIVREM: case ISD::UDIVREM: { - SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); + SDValue V = LowerWin64_i128OP(SDValue(N, 0), DAG); Results.push_back(V); return; } @@ -27354,7 +27545,7 @@ // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = VT.getVectorNumElements(); - for (unsigned i=0; i < MinElts; ++i) { + for (unsigned i = 0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In, DAG.getIntPtrConstant(i, dl)); Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val); @@ -27394,13 +27585,12 @@ // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting // we allow the sra from the extend to i32 to be shared by the split. - EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), - InVT.getVectorNumElements() / 2); - MVT ExtendVT = MVT::getVectorVT(MVT::i32, - VT.getVectorNumElements()); - In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, - In, DAG.getIntPtrConstant(0, dl)); + EVT ExtractVT = + EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + InVT.getVectorNumElements() / 2); + MVT ExtendVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements()); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, In, + DAG.getIntPtrConstant(0, dl)); In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In); // Fill a vector with sign bits for each element. @@ -27444,11 +27634,11 @@ // Create an unpackl and unpackh to interleave the sign bits then bitcast // to v2i64. - SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, - {0, 4, 1, 5}); + SDValue Lo = + DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {0, 4, 1, 5}); Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo); - SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, - {2, 6, 3, 7}); + SDValue Hi = + DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {2, 6, 3, 7}); Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); @@ -27512,10 +27702,9 @@ return; SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src); - Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext - : ISD::AssertSext, - dl, MVT::v2i32, Res, - DAG.getValueType(VT.getVectorElementType())); + Res = DAG.getNode( + N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext, + dl, MVT::v2i32, Res, DAG.getValueType(VT.getVectorElementType())); Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); Results.push_back(Res); return; @@ -27553,13 +27742,12 @@ return; } - if (VT == MVT::v2i32) { assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); bool Widenv2i32 = - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector; + getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector; if (Src.getValueType() == MVT::v2f64) { unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { @@ -27569,8 +27757,8 @@ // Custom widen by doubling to a legal vector with. Isel will // further widen to v8f64. Opc = ISD::FP_TO_UINT; - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, - Src, DAG.getUNDEF(MVT::v2f64)); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src, + DAG.getUNDEF(MVT::v2f64)); } SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); if (!Widenv2i32) @@ -27584,8 +27772,8 @@ SDValue Idx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); - Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT - : ISD::FP_TO_UINT, dl, MVT::v4i32, Res); + Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, + MVT::v4i32, Res); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; @@ -27607,9 +27795,9 @@ MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts); SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, - DAG.getConstantFP(0.0, dl, VecInVT), Src, - ZeroIdx); + SDValue Res = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, + DAG.getConstantFP(0.0, dl, VecInVT), Src, ZeroIdx); Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); Results.push_back(Res); @@ -27654,7 +27842,7 @@ } case ISD::FP_ROUND: { if (!isTypeLegal(N->getOperand(0).getValueType())) - return; + return; SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); return; @@ -27669,8 +27857,9 @@ case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { - default : llvm_unreachable("Do not know how to custom type " - "legalize this intrinsic operation!"); + default: + llvm_unreachable("Do not know how to custom type " + "legalize this intrinsic operation!"); case Intrinsic::x86_rdtsc: return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results); @@ -27703,11 +27892,10 @@ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(1, dl, HalfT)); cpInL = DAG.getCopyToReg(N->getOperand(0), dl, - Regs64bit ? X86::RAX : X86::EAX, - cpInL, SDValue()); - cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, - Regs64bit ? X86::RDX : X86::EDX, - cpInH, cpInL.getValue(1)); + Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue()); + cpInH = + DAG.getCopyToReg(cpInL.getValue(0), dl, Regs64bit ? X86::RDX : X86::EDX, + cpInH, cpInL.getValue(1)); SDValue swapInL, swapInH; swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), DAG.getConstant(0, dl, HalfT)); @@ -27757,12 +27945,12 @@ Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); } SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, - Regs64bit ? X86::RAX : X86::EAX, - HalfT, Result.getValue(1)); + Regs64bit ? X86::RAX : X86::EAX, HalfT, + Result.getValue(1)); SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, - Regs64bit ? X86::RDX : X86::EDX, - HalfT, cpOutL.getValue(2)); - SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; + Regs64bit ? X86::RDX : X86::EDX, HalfT, + cpOutL.getValue(2)); + SDValue OpsF[] = {cpOutL.getValue(0), cpOutH.getValue(0)}; SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, MVT::i32, cpOutH.getValue(2)); @@ -27785,7 +27973,7 @@ // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the // lower 64-bits. SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); - SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Ops[] = {Node->getChain(), Node->getBasePtr()}; SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MVT::i64, Node->getMemOperand()); SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, @@ -27799,10 +27987,9 @@ // integer into the significand. // FIXME: Do we need to glue? See FIXME comment in BuildFILD. SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue); - SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG, - dl, Tys, Ops, MVT::i64, - Node->getMemOperand()); + SDValue Ops[] = {Node->getChain(), Node->getBasePtr()}; + SDValue Result = DAG.getMemIntrinsicNode( + X86ISD::FILD_FLAG, dl, Tys, Ops, MVT::i64, Node->getMemOperand()); SDValue Chain = Result.getValue(1); SDValue InFlag = Result.getValue(2); @@ -27814,11 +28001,10 @@ int SPFI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag }; - Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, - DAG.getVTList(MVT::Other), StoreOps, - MVT::i64, MPI, 0 /*Align*/, - MachineMemOperand::MOStore); + SDValue StoreOps[] = {Chain, Result, StackPtr, InFlag}; + Chain = DAG.getMemIntrinsicNode( + X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, + MPI, 0 /*Align*/, MachineMemOperand::MOStore); // Finally load the value back from the stack temporary and return it. // This load is not atomic and doesn't need to be. @@ -27868,8 +28054,8 @@ } // Custom splitting for BWI types when AVX512F is available but BWI isn't. - if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) && - SrcVT.isVector() && isTypeLegal(SrcVT)) { + if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) && SrcVT.isVector() && + isTypeLegal(SrcVT)) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8; @@ -27905,9 +28091,9 @@ return; SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - Gather->getPassThru(), - DAG.getUNDEF(MVT::v2f32)); + SDValue PassThru = + DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + Gather->getPassThru(), DAG.getUNDEF(MVT::v2f32)); if (!Subtarget.hasVLX()) { // We need to widen the mask, but the instruction will only use 2 // of its elements. So we can use undef. @@ -27915,11 +28101,11 @@ DAG.getUNDEF(MVT::v2i1)); Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); } - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; + SDValue Ops[] = {Gather->getChain(), PassThru, Mask, + Gather->getBasePtr(), Index, Gather->getScale()}; SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, - Gather->getMemoryVT(), Gather->getMemOperand()); + DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, + Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); return; @@ -27929,9 +28115,9 @@ SDValue Index = Gather->getIndex(); SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, - Gather->getPassThru(), - DAG.getUNDEF(MVT::v2i32)); + SDValue PassThru = + DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, + Gather->getPassThru(), DAG.getUNDEF(MVT::v2i32)); // If the index is v2i64 we can use it directly. if (Index.getValueType() == MVT::v2i64 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { @@ -27942,11 +28128,11 @@ DAG.getUNDEF(MVT::v2i1)); Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); } - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; + SDValue Ops[] = {Gather->getChain(), PassThru, Mask, + Gather->getBasePtr(), Index, Gather->getScale()}; SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, - Gather->getMemoryVT(), Gather->getMemOperand()); + DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, + Gather->getMemoryVT(), Gather->getMemOperand()); SDValue Chain = Res.getValue(2); if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, @@ -27957,15 +28143,15 @@ } if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { EVT IndexVT = Index.getValueType(); - EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(), - IndexVT.getScalarType(), 4); + EVT NewIndexVT = + EVT::getVectorVT(*DAG.getContext(), IndexVT.getScalarType(), 4); // Otherwise we need to custom widen everything to avoid promotion. Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, DAG.getUNDEF(IndexVT)); Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; + SDValue Ops[] = {Gather->getChain(), PassThru, Mask, + Gather->getBasePtr(), Index, Gather->getScale()}; SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other), Gather->getMemoryVT(), dl, Ops, Gather->getMemOperand()); @@ -27993,8 +28179,7 @@ auto *Ld = cast(N); MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), - Ld->getAlignment(), + Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); SDValue Chain = Res.getValue(1); MVT WideVT = MVT::getVectorVT(LdVT, 2); @@ -28011,348 +28196,684 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((X86ISD::NodeType)Opcode) { - case X86ISD::FIRST_NUMBER: break; - case X86ISD::BSF: return "X86ISD::BSF"; - case X86ISD::BSR: return "X86ISD::BSR"; - case X86ISD::SHLD: return "X86ISD::SHLD"; - case X86ISD::SHRD: return "X86ISD::SHRD"; - case X86ISD::FAND: return "X86ISD::FAND"; - case X86ISD::FANDN: return "X86ISD::FANDN"; - case X86ISD::FOR: return "X86ISD::FOR"; - case X86ISD::FXOR: return "X86ISD::FXOR"; - case X86ISD::FILD: return "X86ISD::FILD"; - case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; - case X86ISD::FIST: return "X86ISD::FIST"; - case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM"; - case X86ISD::FLD: return "X86ISD::FLD"; - case X86ISD::FST: return "X86ISD::FST"; - case X86ISD::CALL: return "X86ISD::CALL"; - case X86ISD::BT: return "X86ISD::BT"; - case X86ISD::CMP: return "X86ISD::CMP"; - case X86ISD::COMI: return "X86ISD::COMI"; - case X86ISD::UCOMI: return "X86ISD::UCOMI"; - case X86ISD::CMPM: return "X86ISD::CMPM"; - case X86ISD::CMPM_SAE: return "X86ISD::CMPM_SAE"; - case X86ISD::SETCC: return "X86ISD::SETCC"; - case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; - case X86ISD::FSETCC: return "X86ISD::FSETCC"; - case X86ISD::FSETCCM: return "X86ISD::FSETCCM"; - case X86ISD::FSETCCM_SAE: return "X86ISD::FSETCCM_SAE"; - case X86ISD::CMOV: return "X86ISD::CMOV"; - case X86ISD::BRCOND: return "X86ISD::BRCOND"; - case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; - case X86ISD::IRET: return "X86ISD::IRET"; - case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; - case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; - case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; - case X86ISD::Wrapper: return "X86ISD::Wrapper"; - case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; - case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; - case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; - case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; - case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; - case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; - case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; - case X86ISD::PINSRB: return "X86ISD::PINSRB"; - case X86ISD::PINSRW: return "X86ISD::PINSRW"; - case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; - case X86ISD::ANDNP: return "X86ISD::ANDNP"; - case X86ISD::BLENDI: return "X86ISD::BLENDI"; - case X86ISD::BLENDV: return "X86ISD::BLENDV"; - case X86ISD::HADD: return "X86ISD::HADD"; - case X86ISD::HSUB: return "X86ISD::HSUB"; - case X86ISD::FHADD: return "X86ISD::FHADD"; - case X86ISD::FHSUB: return "X86ISD::FHSUB"; - case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; - case X86ISD::FMAX: return "X86ISD::FMAX"; - case X86ISD::FMAXS: return "X86ISD::FMAXS"; - case X86ISD::FMAX_SAE: return "X86ISD::FMAX_SAE"; - case X86ISD::FMAXS_SAE: return "X86ISD::FMAXS_SAE"; - case X86ISD::FMIN: return "X86ISD::FMIN"; - case X86ISD::FMINS: return "X86ISD::FMINS"; - case X86ISD::FMIN_SAE: return "X86ISD::FMIN_SAE"; - case X86ISD::FMINS_SAE: return "X86ISD::FMINS_SAE"; - case X86ISD::FMAXC: return "X86ISD::FMAXC"; - case X86ISD::FMINC: return "X86ISD::FMINC"; - case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; - case X86ISD::FRCP: return "X86ISD::FRCP"; - case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; - case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; - case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; - case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; - case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; - case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP"; - case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP"; + case X86ISD::FIRST_NUMBER: + break; + case X86ISD::BSF: + return "X86ISD::BSF"; + case X86ISD::BSR: + return "X86ISD::BSR"; + case X86ISD::SHLD: + return "X86ISD::SHLD"; + case X86ISD::SHRD: + return "X86ISD::SHRD"; + case X86ISD::FAND: + return "X86ISD::FAND"; + case X86ISD::FANDN: + return "X86ISD::FANDN"; + case X86ISD::FOR: + return "X86ISD::FOR"; + case X86ISD::FXOR: + return "X86ISD::FXOR"; + case X86ISD::FILD: + return "X86ISD::FILD"; + case X86ISD::FILD_FLAG: + return "X86ISD::FILD_FLAG"; + case X86ISD::FIST: + return "X86ISD::FIST"; + case X86ISD::FP_TO_INT_IN_MEM: + return "X86ISD::FP_TO_INT_IN_MEM"; + case X86ISD::FLD: + return "X86ISD::FLD"; + case X86ISD::FST: + return "X86ISD::FST"; + case X86ISD::CALL: + return "X86ISD::CALL"; + case X86ISD::BT: + return "X86ISD::BT"; + case X86ISD::CMP: + return "X86ISD::CMP"; + case X86ISD::COMI: + return "X86ISD::COMI"; + case X86ISD::UCOMI: + return "X86ISD::UCOMI"; + case X86ISD::CMPM: + return "X86ISD::CMPM"; + case X86ISD::CMPM_SAE: + return "X86ISD::CMPM_SAE"; + case X86ISD::SETCC: + return "X86ISD::SETCC"; + case X86ISD::SETCC_CARRY: + return "X86ISD::SETCC_CARRY"; + case X86ISD::FSETCC: + return "X86ISD::FSETCC"; + case X86ISD::FSETCCM: + return "X86ISD::FSETCCM"; + case X86ISD::FSETCCM_SAE: + return "X86ISD::FSETCCM_SAE"; + case X86ISD::CMOV: + return "X86ISD::CMOV"; + case X86ISD::BRCOND: + return "X86ISD::BRCOND"; + case X86ISD::RET_FLAG: + return "X86ISD::RET_FLAG"; + case X86ISD::IRET: + return "X86ISD::IRET"; + case X86ISD::REP_STOS: + return "X86ISD::REP_STOS"; + case X86ISD::REP_MOVS: + return "X86ISD::REP_MOVS"; + case X86ISD::GlobalBaseReg: + return "X86ISD::GlobalBaseReg"; + case X86ISD::Wrapper: + return "X86ISD::Wrapper"; + case X86ISD::WrapperRIP: + return "X86ISD::WrapperRIP"; + case X86ISD::MOVDQ2Q: + return "X86ISD::MOVDQ2Q"; + case X86ISD::MMX_MOVD2W: + return "X86ISD::MMX_MOVD2W"; + case X86ISD::MMX_MOVW2D: + return "X86ISD::MMX_MOVW2D"; + case X86ISD::PEXTRB: + return "X86ISD::PEXTRB"; + case X86ISD::PEXTRW: + return "X86ISD::PEXTRW"; + case X86ISD::INSERTPS: + return "X86ISD::INSERTPS"; + case X86ISD::PINSRB: + return "X86ISD::PINSRB"; + case X86ISD::PINSRW: + return "X86ISD::PINSRW"; + case X86ISD::PSHUFB: + return "X86ISD::PSHUFB"; + case X86ISD::ANDNP: + return "X86ISD::ANDNP"; + case X86ISD::BLENDI: + return "X86ISD::BLENDI"; + case X86ISD::BLENDV: + return "X86ISD::BLENDV"; + case X86ISD::HADD: + return "X86ISD::HADD"; + case X86ISD::HSUB: + return "X86ISD::HSUB"; + case X86ISD::FHADD: + return "X86ISD::FHADD"; + case X86ISD::FHSUB: + return "X86ISD::FHSUB"; + case X86ISD::CONFLICT: + return "X86ISD::CONFLICT"; + case X86ISD::FMAX: + return "X86ISD::FMAX"; + case X86ISD::FMAXS: + return "X86ISD::FMAXS"; + case X86ISD::FMAX_SAE: + return "X86ISD::FMAX_SAE"; + case X86ISD::FMAXS_SAE: + return "X86ISD::FMAXS_SAE"; + case X86ISD::FMIN: + return "X86ISD::FMIN"; + case X86ISD::FMINS: + return "X86ISD::FMINS"; + case X86ISD::FMIN_SAE: + return "X86ISD::FMIN_SAE"; + case X86ISD::FMINS_SAE: + return "X86ISD::FMINS_SAE"; + case X86ISD::FMAXC: + return "X86ISD::FMAXC"; + case X86ISD::FMINC: + return "X86ISD::FMINC"; + case X86ISD::FRSQRT: + return "X86ISD::FRSQRT"; + case X86ISD::FRCP: + return "X86ISD::FRCP"; + case X86ISD::EXTRQI: + return "X86ISD::EXTRQI"; + case X86ISD::INSERTQI: + return "X86ISD::INSERTQI"; + case X86ISD::TLSADDR: + return "X86ISD::TLSADDR"; + case X86ISD::TLSBASEADDR: + return "X86ISD::TLSBASEADDR"; + case X86ISD::TLSCALL: + return "X86ISD::TLSCALL"; + case X86ISD::EH_SJLJ_SETJMP: + return "X86ISD::EH_SJLJ_SETJMP"; + case X86ISD::EH_SJLJ_LONGJMP: + return "X86ISD::EH_SJLJ_LONGJMP"; case X86ISD::EH_SJLJ_SETUP_DISPATCH: return "X86ISD::EH_SJLJ_SETUP_DISPATCH"; - case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; - case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; - case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; - case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r"; - case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; - case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; - case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG"; + case X86ISD::EH_RETURN: + return "X86ISD::EH_RETURN"; + case X86ISD::TC_RETURN: + return "X86ISD::TC_RETURN"; + case X86ISD::FNSTCW16m: + return "X86ISD::FNSTCW16m"; + case X86ISD::FNSTSW16r: + return "X86ISD::FNSTSW16r"; + case X86ISD::LCMPXCHG_DAG: + return "X86ISD::LCMPXCHG_DAG"; + case X86ISD::LCMPXCHG8_DAG: + return "X86ISD::LCMPXCHG8_DAG"; + case X86ISD::LCMPXCHG16_DAG: + return "X86ISD::LCMPXCHG16_DAG"; case X86ISD::LCMPXCHG8_SAVE_EBX_DAG: return "X86ISD::LCMPXCHG8_SAVE_EBX_DAG"; case X86ISD::LCMPXCHG16_SAVE_RBX_DAG: return "X86ISD::LCMPXCHG16_SAVE_RBX_DAG"; - case X86ISD::LADD: return "X86ISD::LADD"; - case X86ISD::LSUB: return "X86ISD::LSUB"; - case X86ISD::LOR: return "X86ISD::LOR"; - case X86ISD::LXOR: return "X86ISD::LXOR"; - case X86ISD::LAND: return "X86ISD::LAND"; - case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; - case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; - case X86ISD::VEXTRACT_STORE: return "X86ISD::VEXTRACT_STORE"; - case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; - case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; - case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; - case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC"; - case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS"; - case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS"; - case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES"; - case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS"; - case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; - case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; - case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; - case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE"; - case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS"; - case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE"; - case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; - case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND"; - case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; - case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS"; - case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; - case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; - case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; - case X86ISD::VSHL: return "X86ISD::VSHL"; - case X86ISD::VSRL: return "X86ISD::VSRL"; - case X86ISD::VSRA: return "X86ISD::VSRA"; - case X86ISD::VSHLI: return "X86ISD::VSHLI"; - case X86ISD::VSRLI: return "X86ISD::VSRLI"; - case X86ISD::VSRAI: return "X86ISD::VSRAI"; - case X86ISD::VSHLV: return "X86ISD::VSHLV"; - case X86ISD::VSRLV: return "X86ISD::VSRLV"; - case X86ISD::VSRAV: return "X86ISD::VSRAV"; - case X86ISD::VROTLI: return "X86ISD::VROTLI"; - case X86ISD::VROTRI: return "X86ISD::VROTRI"; - case X86ISD::VPPERM: return "X86ISD::VPPERM"; - case X86ISD::CMPP: return "X86ISD::CMPP"; - case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; - case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; - case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS"; - case X86ISD::ADD: return "X86ISD::ADD"; - case X86ISD::SUB: return "X86ISD::SUB"; - case X86ISD::ADC: return "X86ISD::ADC"; - case X86ISD::SBB: return "X86ISD::SBB"; - case X86ISD::SMUL: return "X86ISD::SMUL"; - case X86ISD::UMUL: return "X86ISD::UMUL"; - case X86ISD::OR: return "X86ISD::OR"; - case X86ISD::XOR: return "X86ISD::XOR"; - case X86ISD::AND: return "X86ISD::AND"; - case X86ISD::BEXTR: return "X86ISD::BEXTR"; - case X86ISD::BZHI: return "X86ISD::BZHI"; - case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; - case X86ISD::MOVMSK: return "X86ISD::MOVMSK"; - case X86ISD::PTEST: return "X86ISD::PTEST"; - case X86ISD::TESTP: return "X86ISD::TESTP"; - case X86ISD::KORTEST: return "X86ISD::KORTEST"; - case X86ISD::KTEST: return "X86ISD::KTEST"; - case X86ISD::KADD: return "X86ISD::KADD"; - case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL"; - case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR"; - case X86ISD::PACKSS: return "X86ISD::PACKSS"; - case X86ISD::PACKUS: return "X86ISD::PACKUS"; - case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; - case X86ISD::VALIGN: return "X86ISD::VALIGN"; - case X86ISD::VSHLD: return "X86ISD::VSHLD"; - case X86ISD::VSHRD: return "X86ISD::VSHRD"; - case X86ISD::VSHLDV: return "X86ISD::VSHLDV"; - case X86ISD::VSHRDV: return "X86ISD::VSHRDV"; - case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; - case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; - case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; - case X86ISD::SHUFP: return "X86ISD::SHUFP"; - case X86ISD::SHUF128: return "X86ISD::SHUF128"; - case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; - case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; - case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; - case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; - case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; - case X86ISD::MOVSD: return "X86ISD::MOVSD"; - case X86ISD::MOVSS: return "X86ISD::MOVSS"; - case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; - case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; - case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; - case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; - case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; - case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; - case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI"; - case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; - case X86ISD::VPERMV: return "X86ISD::VPERMV"; - case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; - case X86ISD::VPERMI: return "X86ISD::VPERMI"; - case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; - case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; - case X86ISD::VFIXUPIMM_SAE: return "X86ISD::VFIXUPIMM_SAE"; - case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS"; - case X86ISD::VFIXUPIMMS_SAE: return "X86ISD::VFIXUPIMMS_SAE"; - case X86ISD::VRANGE: return "X86ISD::VRANGE"; - case X86ISD::VRANGE_SAE: return "X86ISD::VRANGE_SAE"; - case X86ISD::VRANGES: return "X86ISD::VRANGES"; - case X86ISD::VRANGES_SAE: return "X86ISD::VRANGES_SAE"; - case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; - case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; - case X86ISD::PSADBW: return "X86ISD::PSADBW"; - case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW"; - case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; - case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; - case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; - case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; - case X86ISD::MFENCE: return "X86ISD::MFENCE"; - case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; - case X86ISD::SAHF: return "X86ISD::SAHF"; - case X86ISD::RDRAND: return "X86ISD::RDRAND"; - case X86ISD::RDSEED: return "X86ISD::RDSEED"; - case X86ISD::RDPKRU: return "X86ISD::RDPKRU"; - case X86ISD::WRPKRU: return "X86ISD::WRPKRU"; - case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW"; - case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD"; - case X86ISD::VPSHA: return "X86ISD::VPSHA"; - case X86ISD::VPSHL: return "X86ISD::VPSHL"; - case X86ISD::VPCOM: return "X86ISD::VPCOM"; - case X86ISD::VPCOMU: return "X86ISD::VPCOMU"; - case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2"; - case X86ISD::FMSUB: return "X86ISD::FMSUB"; - case X86ISD::FNMADD: return "X86ISD::FNMADD"; - case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; - case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; - case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; - case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND"; - case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND"; - case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND"; - case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; - case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; - case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; - case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H"; - case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L"; - case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; - case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE"; - case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES"; - case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE"; - case X86ISD::VREDUCE: return "X86ISD::VREDUCE"; - case X86ISD::VREDUCE_SAE: return "X86ISD::VREDUCE_SAE"; - case X86ISD::VREDUCES: return "X86ISD::VREDUCES"; - case X86ISD::VREDUCES_SAE: return "X86ISD::VREDUCES_SAE"; - case X86ISD::VGETMANT: return "X86ISD::VGETMANT"; - case X86ISD::VGETMANT_SAE: return "X86ISD::VGETMANT_SAE"; - case X86ISD::VGETMANTS: return "X86ISD::VGETMANTS"; - case X86ISD::VGETMANTS_SAE: return "X86ISD::VGETMANTS_SAE"; - case X86ISD::PCMPESTR: return "X86ISD::PCMPESTR"; - case X86ISD::PCMPISTR: return "X86ISD::PCMPISTR"; - case X86ISD::XTEST: return "X86ISD::XTEST"; - case X86ISD::COMPRESS: return "X86ISD::COMPRESS"; - case X86ISD::EXPAND: return "X86ISD::EXPAND"; - case X86ISD::SELECTS: return "X86ISD::SELECTS"; - case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; - case X86ISD::RCP14: return "X86ISD::RCP14"; - case X86ISD::RCP14S: return "X86ISD::RCP14S"; - case X86ISD::RCP28: return "X86ISD::RCP28"; - case X86ISD::RCP28_SAE: return "X86ISD::RCP28_SAE"; - case X86ISD::RCP28S: return "X86ISD::RCP28S"; - case X86ISD::RCP28S_SAE: return "X86ISD::RCP28S_SAE"; - case X86ISD::EXP2: return "X86ISD::EXP2"; - case X86ISD::EXP2_SAE: return "X86ISD::EXP2_SAE"; - case X86ISD::RSQRT14: return "X86ISD::RSQRT14"; - case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S"; - case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; - case X86ISD::RSQRT28_SAE: return "X86ISD::RSQRT28_SAE"; - case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; - case X86ISD::RSQRT28S_SAE: return "X86ISD::RSQRT28S_SAE"; - case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; - case X86ISD::FADDS: return "X86ISD::FADDS"; - case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND"; - case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; - case X86ISD::FSUBS: return "X86ISD::FSUBS"; - case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND"; - case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; - case X86ISD::FMULS: return "X86ISD::FMULS"; - case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND"; - case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; - case X86ISD::FDIVS: return "X86ISD::FDIVS"; - case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND"; - case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; - case X86ISD::FSQRTS: return "X86ISD::FSQRTS"; - case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND"; - case X86ISD::FGETEXP: return "X86ISD::FGETEXP"; - case X86ISD::FGETEXP_SAE: return "X86ISD::FGETEXP_SAE"; - case X86ISD::FGETEXPS: return "X86ISD::FGETEXPS"; - case X86ISD::FGETEXPS_SAE: return "X86ISD::FGETEXPS_SAE"; - case X86ISD::SCALEF: return "X86ISD::SCALEF"; - case X86ISD::SCALEF_RND: return "X86ISD::SCALEF_RND"; - case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; - case X86ISD::SCALEFS_RND: return "X86ISD::SCALEFS_RND"; - case X86ISD::AVG: return "X86ISD::AVG"; - case X86ISD::MULHRS: return "X86ISD::MULHRS"; - case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; - case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; - case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI"; - case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI"; - case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI"; - case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI"; - case X86ISD::CVTTP2SI_SAE: return "X86ISD::CVTTP2SI_SAE"; - case X86ISD::CVTTP2UI_SAE: return "X86ISD::CVTTP2UI_SAE"; - case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI"; - case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI"; - case X86ISD::CVTTS2SI_SAE: return "X86ISD::CVTTS2SI_SAE"; - case X86ISD::CVTTS2UI_SAE: return "X86ISD::CVTTS2UI_SAE"; - case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P"; - case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P"; - case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P"; - case X86ISD::MCVTUI2P: return "X86ISD::MCVTUI2P"; - case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; - case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; - case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT"; - case X86ISD::SCALAR_SINT_TO_FP: return "X86ISD::SCALAR_SINT_TO_FP"; - case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND"; - case X86ISD::SCALAR_UINT_TO_FP: return "X86ISD::SCALAR_UINT_TO_FP"; - case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND"; - case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH"; - case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH"; - case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS"; - case X86ISD::CVTPH2PS_SAE: return "X86ISD::CVTPH2PS_SAE"; - case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI"; - case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI"; - case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI"; - case X86ISD::MCVTP2UI: return "X86ISD::MCVTP2UI"; - case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND"; - case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND"; - case X86ISD::CVTS2SI: return "X86ISD::CVTS2SI"; - case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI"; - case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; - case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; - case X86ISD::CVTNE2PS2BF16: return "X86ISD::CVTNE2PS2BF16"; - case X86ISD::CVTNEPS2BF16: return "X86ISD::CVTNEPS2BF16"; - case X86ISD::MCVTNEPS2BF16: return "X86ISD::MCVTNEPS2BF16"; - case X86ISD::DPBF16PS: return "X86ISD::DPBF16PS"; - case X86ISD::LWPINS: return "X86ISD::LWPINS"; - case X86ISD::MGATHER: return "X86ISD::MGATHER"; - case X86ISD::MSCATTER: return "X86ISD::MSCATTER"; - case X86ISD::VPDPBUSD: return "X86ISD::VPDPBUSD"; - case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS"; - case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD"; - case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS"; - case X86ISD::VPSHUFBITQMB: return "X86ISD::VPSHUFBITQMB"; - case X86ISD::GF2P8MULB: return "X86ISD::GF2P8MULB"; - case X86ISD::GF2P8AFFINEQB: return "X86ISD::GF2P8AFFINEQB"; - case X86ISD::GF2P8AFFINEINVQB: return "X86ISD::GF2P8AFFINEINVQB"; - case X86ISD::NT_CALL: return "X86ISD::NT_CALL"; - case X86ISD::NT_BRIND: return "X86ISD::NT_BRIND"; - case X86ISD::UMWAIT: return "X86ISD::UMWAIT"; - case X86ISD::TPAUSE: return "X86ISD::TPAUSE"; - case X86ISD::ENQCMD: return "X86ISD:ENQCMD"; - case X86ISD::ENQCMDS: return "X86ISD:ENQCMDS"; - case X86ISD::VP2INTERSECT: return "X86ISD::VP2INTERSECT"; + case X86ISD::LADD: + return "X86ISD::LADD"; + case X86ISD::LSUB: + return "X86ISD::LSUB"; + case X86ISD::LOR: + return "X86ISD::LOR"; + case X86ISD::LXOR: + return "X86ISD::LXOR"; + case X86ISD::LAND: + return "X86ISD::LAND"; + case X86ISD::VZEXT_MOVL: + return "X86ISD::VZEXT_MOVL"; + case X86ISD::VZEXT_LOAD: + return "X86ISD::VZEXT_LOAD"; + case X86ISD::VEXTRACT_STORE: + return "X86ISD::VEXTRACT_STORE"; + case X86ISD::VTRUNC: + return "X86ISD::VTRUNC"; + case X86ISD::VTRUNCS: + return "X86ISD::VTRUNCS"; + case X86ISD::VTRUNCUS: + return "X86ISD::VTRUNCUS"; + case X86ISD::VMTRUNC: + return "X86ISD::VMTRUNC"; + case X86ISD::VMTRUNCS: + return "X86ISD::VMTRUNCS"; + case X86ISD::VMTRUNCUS: + return "X86ISD::VMTRUNCUS"; + case X86ISD::VTRUNCSTORES: + return "X86ISD::VTRUNCSTORES"; + case X86ISD::VTRUNCSTOREUS: + return "X86ISD::VTRUNCSTOREUS"; + case X86ISD::VMTRUNCSTORES: + return "X86ISD::VMTRUNCSTORES"; + case X86ISD::VMTRUNCSTOREUS: + return "X86ISD::VMTRUNCSTOREUS"; + case X86ISD::VFPEXT: + return "X86ISD::VFPEXT"; + case X86ISD::VFPEXT_SAE: + return "X86ISD::VFPEXT_SAE"; + case X86ISD::VFPEXTS: + return "X86ISD::VFPEXTS"; + case X86ISD::VFPEXTS_SAE: + return "X86ISD::VFPEXTS_SAE"; + case X86ISD::VFPROUND: + return "X86ISD::VFPROUND"; + case X86ISD::VMFPROUND: + return "X86ISD::VMFPROUND"; + case X86ISD::VFPROUND_RND: + return "X86ISD::VFPROUND_RND"; + case X86ISD::VFPROUNDS: + return "X86ISD::VFPROUNDS"; + case X86ISD::VFPROUNDS_RND: + return "X86ISD::VFPROUNDS_RND"; + case X86ISD::VSHLDQ: + return "X86ISD::VSHLDQ"; + case X86ISD::VSRLDQ: + return "X86ISD::VSRLDQ"; + case X86ISD::VSHL: + return "X86ISD::VSHL"; + case X86ISD::VSRL: + return "X86ISD::VSRL"; + case X86ISD::VSRA: + return "X86ISD::VSRA"; + case X86ISD::VSHLI: + return "X86ISD::VSHLI"; + case X86ISD::VSRLI: + return "X86ISD::VSRLI"; + case X86ISD::VSRAI: + return "X86ISD::VSRAI"; + case X86ISD::VSHLV: + return "X86ISD::VSHLV"; + case X86ISD::VSRLV: + return "X86ISD::VSRLV"; + case X86ISD::VSRAV: + return "X86ISD::VSRAV"; + case X86ISD::VROTLI: + return "X86ISD::VROTLI"; + case X86ISD::VROTRI: + return "X86ISD::VROTRI"; + case X86ISD::VPPERM: + return "X86ISD::VPPERM"; + case X86ISD::CMPP: + return "X86ISD::CMPP"; + case X86ISD::PCMPEQ: + return "X86ISD::PCMPEQ"; + case X86ISD::PCMPGT: + return "X86ISD::PCMPGT"; + case X86ISD::PHMINPOS: + return "X86ISD::PHMINPOS"; + case X86ISD::ADD: + return "X86ISD::ADD"; + case X86ISD::SUB: + return "X86ISD::SUB"; + case X86ISD::ADC: + return "X86ISD::ADC"; + case X86ISD::SBB: + return "X86ISD::SBB"; + case X86ISD::SMUL: + return "X86ISD::SMUL"; + case X86ISD::UMUL: + return "X86ISD::UMUL"; + case X86ISD::OR: + return "X86ISD::OR"; + case X86ISD::XOR: + return "X86ISD::XOR"; + case X86ISD::AND: + return "X86ISD::AND"; + case X86ISD::BEXTR: + return "X86ISD::BEXTR"; + case X86ISD::BZHI: + return "X86ISD::BZHI"; + case X86ISD::MUL_IMM: + return "X86ISD::MUL_IMM"; + case X86ISD::MOVMSK: + return "X86ISD::MOVMSK"; + case X86ISD::PTEST: + return "X86ISD::PTEST"; + case X86ISD::TESTP: + return "X86ISD::TESTP"; + case X86ISD::KORTEST: + return "X86ISD::KORTEST"; + case X86ISD::KTEST: + return "X86ISD::KTEST"; + case X86ISD::KADD: + return "X86ISD::KADD"; + case X86ISD::KSHIFTL: + return "X86ISD::KSHIFTL"; + case X86ISD::KSHIFTR: + return "X86ISD::KSHIFTR"; + case X86ISD::PACKSS: + return "X86ISD::PACKSS"; + case X86ISD::PACKUS: + return "X86ISD::PACKUS"; + case X86ISD::PALIGNR: + return "X86ISD::PALIGNR"; + case X86ISD::VALIGN: + return "X86ISD::VALIGN"; + case X86ISD::VSHLD: + return "X86ISD::VSHLD"; + case X86ISD::VSHRD: + return "X86ISD::VSHRD"; + case X86ISD::VSHLDV: + return "X86ISD::VSHLDV"; + case X86ISD::VSHRDV: + return "X86ISD::VSHRDV"; + case X86ISD::PSHUFD: + return "X86ISD::PSHUFD"; + case X86ISD::PSHUFHW: + return "X86ISD::PSHUFHW"; + case X86ISD::PSHUFLW: + return "X86ISD::PSHUFLW"; + case X86ISD::SHUFP: + return "X86ISD::SHUFP"; + case X86ISD::SHUF128: + return "X86ISD::SHUF128"; + case X86ISD::MOVLHPS: + return "X86ISD::MOVLHPS"; + case X86ISD::MOVHLPS: + return "X86ISD::MOVHLPS"; + case X86ISD::MOVDDUP: + return "X86ISD::MOVDDUP"; + case X86ISD::MOVSHDUP: + return "X86ISD::MOVSHDUP"; + case X86ISD::MOVSLDUP: + return "X86ISD::MOVSLDUP"; + case X86ISD::MOVSD: + return "X86ISD::MOVSD"; + case X86ISD::MOVSS: + return "X86ISD::MOVSS"; + case X86ISD::UNPCKL: + return "X86ISD::UNPCKL"; + case X86ISD::UNPCKH: + return "X86ISD::UNPCKH"; + case X86ISD::VBROADCAST: + return "X86ISD::VBROADCAST"; + case X86ISD::VBROADCASTM: + return "X86ISD::VBROADCASTM"; + case X86ISD::SUBV_BROADCAST: + return "X86ISD::SUBV_BROADCAST"; + case X86ISD::VPERMILPV: + return "X86ISD::VPERMILPV"; + case X86ISD::VPERMILPI: + return "X86ISD::VPERMILPI"; + case X86ISD::VPERM2X128: + return "X86ISD::VPERM2X128"; + case X86ISD::VPERMV: + return "X86ISD::VPERMV"; + case X86ISD::VPERMV3: + return "X86ISD::VPERMV3"; + case X86ISD::VPERMI: + return "X86ISD::VPERMI"; + case X86ISD::VPTERNLOG: + return "X86ISD::VPTERNLOG"; + case X86ISD::VFIXUPIMM: + return "X86ISD::VFIXUPIMM"; + case X86ISD::VFIXUPIMM_SAE: + return "X86ISD::VFIXUPIMM_SAE"; + case X86ISD::VFIXUPIMMS: + return "X86ISD::VFIXUPIMMS"; + case X86ISD::VFIXUPIMMS_SAE: + return "X86ISD::VFIXUPIMMS_SAE"; + case X86ISD::VRANGE: + return "X86ISD::VRANGE"; + case X86ISD::VRANGE_SAE: + return "X86ISD::VRANGE_SAE"; + case X86ISD::VRANGES: + return "X86ISD::VRANGES"; + case X86ISD::VRANGES_SAE: + return "X86ISD::VRANGES_SAE"; + case X86ISD::PMULUDQ: + return "X86ISD::PMULUDQ"; + case X86ISD::PMULDQ: + return "X86ISD::PMULDQ"; + case X86ISD::PSADBW: + return "X86ISD::PSADBW"; + case X86ISD::DBPSADBW: + return "X86ISD::DBPSADBW"; + case X86ISD::VASTART_SAVE_XMM_REGS: + return "X86ISD::VASTART_SAVE_XMM_REGS"; + case X86ISD::VAARG_64: + return "X86ISD::VAARG_64"; + case X86ISD::WIN_ALLOCA: + return "X86ISD::WIN_ALLOCA"; + case X86ISD::MEMBARRIER: + return "X86ISD::MEMBARRIER"; + case X86ISD::MFENCE: + return "X86ISD::MFENCE"; + case X86ISD::SEG_ALLOCA: + return "X86ISD::SEG_ALLOCA"; + case X86ISD::SAHF: + return "X86ISD::SAHF"; + case X86ISD::RDRAND: + return "X86ISD::RDRAND"; + case X86ISD::RDSEED: + return "X86ISD::RDSEED"; + case X86ISD::RDPKRU: + return "X86ISD::RDPKRU"; + case X86ISD::WRPKRU: + return "X86ISD::WRPKRU"; + case X86ISD::VPMADDUBSW: + return "X86ISD::VPMADDUBSW"; + case X86ISD::VPMADDWD: + return "X86ISD::VPMADDWD"; + case X86ISD::VPSHA: + return "X86ISD::VPSHA"; + case X86ISD::VPSHL: + return "X86ISD::VPSHL"; + case X86ISD::VPCOM: + return "X86ISD::VPCOM"; + case X86ISD::VPCOMU: + return "X86ISD::VPCOMU"; + case X86ISD::VPERMIL2: + return "X86ISD::VPERMIL2"; + case X86ISD::FMSUB: + return "X86ISD::FMSUB"; + case X86ISD::FNMADD: + return "X86ISD::FNMADD"; + case X86ISD::FNMSUB: + return "X86ISD::FNMSUB"; + case X86ISD::FMADDSUB: + return "X86ISD::FMADDSUB"; + case X86ISD::FMSUBADD: + return "X86ISD::FMSUBADD"; + case X86ISD::FMADD_RND: + return "X86ISD::FMADD_RND"; + case X86ISD::FNMADD_RND: + return "X86ISD::FNMADD_RND"; + case X86ISD::FMSUB_RND: + return "X86ISD::FMSUB_RND"; + case X86ISD::FNMSUB_RND: + return "X86ISD::FNMSUB_RND"; + case X86ISD::FMADDSUB_RND: + return "X86ISD::FMADDSUB_RND"; + case X86ISD::FMSUBADD_RND: + return "X86ISD::FMSUBADD_RND"; + case X86ISD::VPMADD52H: + return "X86ISD::VPMADD52H"; + case X86ISD::VPMADD52L: + return "X86ISD::VPMADD52L"; + case X86ISD::VRNDSCALE: + return "X86ISD::VRNDSCALE"; + case X86ISD::VRNDSCALE_SAE: + return "X86ISD::VRNDSCALE_SAE"; + case X86ISD::VRNDSCALES: + return "X86ISD::VRNDSCALES"; + case X86ISD::VRNDSCALES_SAE: + return "X86ISD::VRNDSCALES_SAE"; + case X86ISD::VREDUCE: + return "X86ISD::VREDUCE"; + case X86ISD::VREDUCE_SAE: + return "X86ISD::VREDUCE_SAE"; + case X86ISD::VREDUCES: + return "X86ISD::VREDUCES"; + case X86ISD::VREDUCES_SAE: + return "X86ISD::VREDUCES_SAE"; + case X86ISD::VGETMANT: + return "X86ISD::VGETMANT"; + case X86ISD::VGETMANT_SAE: + return "X86ISD::VGETMANT_SAE"; + case X86ISD::VGETMANTS: + return "X86ISD::VGETMANTS"; + case X86ISD::VGETMANTS_SAE: + return "X86ISD::VGETMANTS_SAE"; + case X86ISD::PCMPESTR: + return "X86ISD::PCMPESTR"; + case X86ISD::PCMPISTR: + return "X86ISD::PCMPISTR"; + case X86ISD::XTEST: + return "X86ISD::XTEST"; + case X86ISD::COMPRESS: + return "X86ISD::COMPRESS"; + case X86ISD::EXPAND: + return "X86ISD::EXPAND"; + case X86ISD::SELECTS: + return "X86ISD::SELECTS"; + case X86ISD::ADDSUB: + return "X86ISD::ADDSUB"; + case X86ISD::RCP14: + return "X86ISD::RCP14"; + case X86ISD::RCP14S: + return "X86ISD::RCP14S"; + case X86ISD::RCP28: + return "X86ISD::RCP28"; + case X86ISD::RCP28_SAE: + return "X86ISD::RCP28_SAE"; + case X86ISD::RCP28S: + return "X86ISD::RCP28S"; + case X86ISD::RCP28S_SAE: + return "X86ISD::RCP28S_SAE"; + case X86ISD::EXP2: + return "X86ISD::EXP2"; + case X86ISD::EXP2_SAE: + return "X86ISD::EXP2_SAE"; + case X86ISD::RSQRT14: + return "X86ISD::RSQRT14"; + case X86ISD::RSQRT14S: + return "X86ISD::RSQRT14S"; + case X86ISD::RSQRT28: + return "X86ISD::RSQRT28"; + case X86ISD::RSQRT28_SAE: + return "X86ISD::RSQRT28_SAE"; + case X86ISD::RSQRT28S: + return "X86ISD::RSQRT28S"; + case X86ISD::RSQRT28S_SAE: + return "X86ISD::RSQRT28S_SAE"; + case X86ISD::FADD_RND: + return "X86ISD::FADD_RND"; + case X86ISD::FADDS: + return "X86ISD::FADDS"; + case X86ISD::FADDS_RND: + return "X86ISD::FADDS_RND"; + case X86ISD::FSUB_RND: + return "X86ISD::FSUB_RND"; + case X86ISD::FSUBS: + return "X86ISD::FSUBS"; + case X86ISD::FSUBS_RND: + return "X86ISD::FSUBS_RND"; + case X86ISD::FMUL_RND: + return "X86ISD::FMUL_RND"; + case X86ISD::FMULS: + return "X86ISD::FMULS"; + case X86ISD::FMULS_RND: + return "X86ISD::FMULS_RND"; + case X86ISD::FDIV_RND: + return "X86ISD::FDIV_RND"; + case X86ISD::FDIVS: + return "X86ISD::FDIVS"; + case X86ISD::FDIVS_RND: + return "X86ISD::FDIVS_RND"; + case X86ISD::FSQRT_RND: + return "X86ISD::FSQRT_RND"; + case X86ISD::FSQRTS: + return "X86ISD::FSQRTS"; + case X86ISD::FSQRTS_RND: + return "X86ISD::FSQRTS_RND"; + case X86ISD::FGETEXP: + return "X86ISD::FGETEXP"; + case X86ISD::FGETEXP_SAE: + return "X86ISD::FGETEXP_SAE"; + case X86ISD::FGETEXPS: + return "X86ISD::FGETEXPS"; + case X86ISD::FGETEXPS_SAE: + return "X86ISD::FGETEXPS_SAE"; + case X86ISD::SCALEF: + return "X86ISD::SCALEF"; + case X86ISD::SCALEF_RND: + return "X86ISD::SCALEF_RND"; + case X86ISD::SCALEFS: + return "X86ISD::SCALEFS"; + case X86ISD::SCALEFS_RND: + return "X86ISD::SCALEFS_RND"; + case X86ISD::AVG: + return "X86ISD::AVG"; + case X86ISD::MULHRS: + return "X86ISD::MULHRS"; + case X86ISD::SINT_TO_FP_RND: + return "X86ISD::SINT_TO_FP_RND"; + case X86ISD::UINT_TO_FP_RND: + return "X86ISD::UINT_TO_FP_RND"; + case X86ISD::CVTTP2SI: + return "X86ISD::CVTTP2SI"; + case X86ISD::CVTTP2UI: + return "X86ISD::CVTTP2UI"; + case X86ISD::MCVTTP2SI: + return "X86ISD::MCVTTP2SI"; + case X86ISD::MCVTTP2UI: + return "X86ISD::MCVTTP2UI"; + case X86ISD::CVTTP2SI_SAE: + return "X86ISD::CVTTP2SI_SAE"; + case X86ISD::CVTTP2UI_SAE: + return "X86ISD::CVTTP2UI_SAE"; + case X86ISD::CVTTS2SI: + return "X86ISD::CVTTS2SI"; + case X86ISD::CVTTS2UI: + return "X86ISD::CVTTS2UI"; + case X86ISD::CVTTS2SI_SAE: + return "X86ISD::CVTTS2SI_SAE"; + case X86ISD::CVTTS2UI_SAE: + return "X86ISD::CVTTS2UI_SAE"; + case X86ISD::CVTSI2P: + return "X86ISD::CVTSI2P"; + case X86ISD::CVTUI2P: + return "X86ISD::CVTUI2P"; + case X86ISD::MCVTSI2P: + return "X86ISD::MCVTSI2P"; + case X86ISD::MCVTUI2P: + return "X86ISD::MCVTUI2P"; + case X86ISD::VFPCLASS: + return "X86ISD::VFPCLASS"; + case X86ISD::VFPCLASSS: + return "X86ISD::VFPCLASSS"; + case X86ISD::MULTISHIFT: + return "X86ISD::MULTISHIFT"; + case X86ISD::SCALAR_SINT_TO_FP: + return "X86ISD::SCALAR_SINT_TO_FP"; + case X86ISD::SCALAR_SINT_TO_FP_RND: + return "X86ISD::SCALAR_SINT_TO_FP_RND"; + case X86ISD::SCALAR_UINT_TO_FP: + return "X86ISD::SCALAR_UINT_TO_FP"; + case X86ISD::SCALAR_UINT_TO_FP_RND: + return "X86ISD::SCALAR_UINT_TO_FP_RND"; + case X86ISD::CVTPS2PH: + return "X86ISD::CVTPS2PH"; + case X86ISD::MCVTPS2PH: + return "X86ISD::MCVTPS2PH"; + case X86ISD::CVTPH2PS: + return "X86ISD::CVTPH2PS"; + case X86ISD::CVTPH2PS_SAE: + return "X86ISD::CVTPH2PS_SAE"; + case X86ISD::CVTP2SI: + return "X86ISD::CVTP2SI"; + case X86ISD::CVTP2UI: + return "X86ISD::CVTP2UI"; + case X86ISD::MCVTP2SI: + return "X86ISD::MCVTP2SI"; + case X86ISD::MCVTP2UI: + return "X86ISD::MCVTP2UI"; + case X86ISD::CVTP2SI_RND: + return "X86ISD::CVTP2SI_RND"; + case X86ISD::CVTP2UI_RND: + return "X86ISD::CVTP2UI_RND"; + case X86ISD::CVTS2SI: + return "X86ISD::CVTS2SI"; + case X86ISD::CVTS2UI: + return "X86ISD::CVTS2UI"; + case X86ISD::CVTS2SI_RND: + return "X86ISD::CVTS2SI_RND"; + case X86ISD::CVTS2UI_RND: + return "X86ISD::CVTS2UI_RND"; + case X86ISD::CVTNE2PS2BF16: + return "X86ISD::CVTNE2PS2BF16"; + case X86ISD::CVTNEPS2BF16: + return "X86ISD::CVTNEPS2BF16"; + case X86ISD::MCVTNEPS2BF16: + return "X86ISD::MCVTNEPS2BF16"; + case X86ISD::DPBF16PS: + return "X86ISD::DPBF16PS"; + case X86ISD::LWPINS: + return "X86ISD::LWPINS"; + case X86ISD::MGATHER: + return "X86ISD::MGATHER"; + case X86ISD::MSCATTER: + return "X86ISD::MSCATTER"; + case X86ISD::VPDPBUSD: + return "X86ISD::VPDPBUSD"; + case X86ISD::VPDPBUSDS: + return "X86ISD::VPDPBUSDS"; + case X86ISD::VPDPWSSD: + return "X86ISD::VPDPWSSD"; + case X86ISD::VPDPWSSDS: + return "X86ISD::VPDPWSSDS"; + case X86ISD::VPSHUFBITQMB: + return "X86ISD::VPSHUFBITQMB"; + case X86ISD::GF2P8MULB: + return "X86ISD::GF2P8MULB"; + case X86ISD::GF2P8AFFINEQB: + return "X86ISD::GF2P8AFFINEQB"; + case X86ISD::GF2P8AFFINEINVQB: + return "X86ISD::GF2P8AFFINEINVQB"; + case X86ISD::NT_CALL: + return "X86ISD::NT_CALL"; + case X86ISD::NT_BRIND: + return "X86ISD::NT_BRIND"; + case X86ISD::UMWAIT: + return "X86ISD::UMWAIT"; + case X86ISD::TPAUSE: + return "X86ISD::TPAUSE"; + case X86ISD::ENQCMD: + return "X86ISD:ENQCMD"; + case X86ISD::ENQCMDS: + return "X86ISD:ENQCMDS"; + case X86ISD::VP2INTERSECT: + return "X86ISD::VP2INTERSECT"; } return nullptr; } @@ -28404,7 +28925,7 @@ if (AM.HasBaseReg) return false; break; - default: // Other stuff never works. + default: // Other stuff never works. return false; } @@ -28431,7 +28952,7 @@ // AVX512BW has shifts such as vpsllvw. if (Subtarget.hasBWI() && Bits == 16) - return false; + return false; // Otherwise, it's significantly cheaper to shift by a scalar amount than by a // fully general vector. @@ -28526,12 +29047,13 @@ if (Val.getOpcode() != ISD::LOAD) return false; - if (!VT1.isSimple() || !VT1.isInteger() || - !VT2.isSimple() || !VT2.isInteger()) + if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() || + !VT2.isInteger()) return false; switch (VT1.getSimpleVT().SimpleTy) { - default: break; + default: + break; case MVT::i8: case MVT::i16: case MVT::i32: @@ -28552,8 +29074,7 @@ return true; } -bool -X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { if (!Subtarget.hasAnyFMA()) return false; @@ -28693,15 +29214,15 @@ // sinkMBB: // DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB) BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) - .addReg(mainDstReg).addMBB(mainMBB) - .addReg(fallDstReg).addMBB(fallMBB); + .addReg(mainDstReg) + .addMBB(mainMBB) + .addReg(fallDstReg) + .addMBB(fallMBB); MI.eraseFromParent(); return sinkMBB; } - - MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -28754,8 +29275,8 @@ unsigned TotalNumXMMRegs = 8; bool UseGPOffset = (ArgMode == 1); bool UseFPOffset = (ArgMode == 2); - unsigned MaxOffset = TotalNumIntRegs * 8 + - (UseFPOffset ? TotalNumXMMRegs * 16 : 0); + unsigned MaxOffset = + TotalNumIntRegs * 8 + (UseFPOffset ? TotalNumXMMRegs * 16 : 0); /* Align ArgSize to a multiple of 8 */ unsigned ArgSizeA8 = (ArgSize + 7) & ~7; @@ -28766,8 +29287,8 @@ MachineBasicBlock *offsetMBB; MachineBasicBlock *endMBB; - unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB - unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB + unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB + unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB unsigned OffsetReg = 0; if (!UseGPOffset && !UseFPOffset) { @@ -28834,13 +29355,14 @@ // Check if there is enough room left to pull this argument. BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) - .addReg(OffsetReg) - .addImm(MaxOffset + 8 - ArgSizeA8); + .addReg(OffsetReg) + .addImm(MaxOffset + 8 - ArgSizeA8); // Branch to "overflowMBB" if offset >= max // Fall through to "offsetMBB" otherwise BuildMI(thisMBB, DL, TII->get(X86::JCC_1)) - .addMBB(overflowMBB).addImm(X86::COND_AE); + .addMBB(overflowMBB) + .addImm(X86::COND_AE); } // In offsetMBB, emit code to use the reg_save_area. @@ -28859,21 +29381,21 @@ // Zero-extend the offset unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); - BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) .addImm(0) .addReg(OffsetReg) .addImm(X86::sub_32bit); // Add the offset to the reg_save_area to get the final address. BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) - .addReg(OffsetReg64) - .addReg(RegSaveReg); + .addReg(OffsetReg64) + .addReg(RegSaveReg); // Compute the offset for the next argument unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) - .addReg(OffsetReg) - .addImm(UseFPOffset ? 16 : 8); + .addReg(OffsetReg) + .addImm(UseFPOffset ? 16 : 8); // Store it back into the va_list. BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) @@ -28886,8 +29408,7 @@ .setMemRefs(MMOs); // Jump to endMBB - BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) - .addMBB(endMBB); + BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)).addMBB(endMBB); } // @@ -28913,23 +29434,23 @@ // aligned_addr = (addr + (align-1)) & ~(align-1) BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) - .addReg(OverflowAddrReg) - .addImm(Align-1); + .addReg(OverflowAddrReg) + .addImm(Align - 1); BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) - .addReg(TmpReg) - .addImm(~(uint64_t)(Align-1)); + .addReg(TmpReg) + .addImm(~(uint64_t)(Align - 1)); } else { BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) - .addReg(OverflowAddrReg); + .addReg(OverflowAddrReg); } // Compute the next overflow address after this argument. // (the overflow address should be kept 8-byte aligned) unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) - .addReg(OverflowDestReg) - .addImm(ArgSizeA8); + .addReg(OverflowDestReg) + .addImm(ArgSizeA8); // Store the new overflow address. BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) @@ -28943,10 +29464,11 @@ // If we branched, emit the PHI to the front of endMBB. if (offsetMBB) { - BuildMI(*endMBB, endMBB->begin(), DL, - TII->get(X86::PHI), DestReg) - .addReg(OffsetDestReg).addMBB(offsetMBB) - .addReg(OverflowDestReg).addMBB(overflowMBB); + BuildMI(*endMBB, endMBB->begin(), DL, TII->get(X86::PHI), DestReg) + .addReg(OffsetDestReg) + .addMBB(offsetMBB) + .addReg(OverflowDestReg) + .addMBB(overflowMBB); } // Erase the pseudo instruction @@ -29036,12 +29558,12 @@ // kill marker, and set it if it should. Returns the correct kill // marker value. static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, - MachineBasicBlock* BB, - const TargetRegisterInfo* TRI) { + MachineBasicBlock *BB, + const TargetRegisterInfo *TRI) { // Scan forward through BB for a use/def of EFLAGS. MachineBasicBlock::iterator miI(std::next(SelectItr)); for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) { - const MachineInstr& mi = *miI; + const MachineInstr &mi = *miI; if (mi.readsRegister(X86::EFLAGS)) return false; if (mi.definesRegister(X86::EFLAGS)) @@ -29054,7 +29576,7 @@ for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), sEnd = BB->succ_end(); sItr != sEnd; ++sItr) { - MachineBasicBlock* succ = *sItr; + MachineBasicBlock *succ = *sItr; if (succ->isLiveIn(X86::EFLAGS)) return false; } @@ -29281,7 +29803,9 @@ X86::CondCode SecondCC = X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm()); - BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC); + BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)) + .addMBB(SinkMBB) + .addImm(SecondCC); // SinkMBB: // %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ] @@ -29514,56 +30038,61 @@ // Add code to the main basic block to check if the stack limit has been hit, // and if so, jump to mallocMBB otherwise to bumpMBB. BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); - BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) - .addReg(tmpSPVReg).addReg(sizeVReg); - BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) - .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) - .addReg(SPLimitVReg); + BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr), SPLimitVReg) + .addReg(tmpSPVReg) + .addReg(sizeVReg); + BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr : X86::CMP32mr)) + .addReg(0) + .addImm(1) + .addReg(0) + .addImm(TlsOffset) + .addReg(TlsReg) + .addReg(SPLimitVReg); BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G); // bumpMBB simply decreases the stack pointer, since we know the current // stacklet has enough space. BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) - .addReg(SPLimitVReg); + .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) - .addReg(SPLimitVReg); + .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); // Calls into a routine in libgcc to allocate more space from the heap. const uint32_t *RegMask = Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); if (IsLP64) { - BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) - .addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI).addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) - .addExternalSymbol("__morestack_allocate_stack_space") - .addRegMask(RegMask) - .addReg(X86::RDI, RegState::Implicit) - .addReg(X86::RAX, RegState::ImplicitDefine); + .addExternalSymbol("__morestack_allocate_stack_space") + .addRegMask(RegMask) + .addReg(X86::RDI, RegState::Implicit) + .addReg(X86::RAX, RegState::ImplicitDefine); } else if (Is64Bit) { - BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) - .addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI).addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) - .addExternalSymbol("__morestack_allocate_stack_space") - .addRegMask(RegMask) - .addReg(X86::EDI, RegState::Implicit) - .addReg(X86::EAX, RegState::ImplicitDefine); + .addExternalSymbol("__morestack_allocate_stack_space") + .addRegMask(RegMask) + .addReg(X86::EDI, RegState::Implicit) + .addReg(X86::EAX, RegState::ImplicitDefine); } else { - BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) - .addImm(12); + BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg) + .addReg(physSPReg) + .addImm(12); BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol("__morestack_allocate_stack_space") - .addRegMask(RegMask) - .addReg(X86::EAX, RegState::ImplicitDefine); + .addExternalSymbol("__morestack_allocate_stack_space") + .addRegMask(RegMask) + .addReg(X86::EAX, RegState::ImplicitDefine); } if (!Is64Bit) - BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) - .addImm(16); + BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg) + .addReg(physSPReg) + .addImm(16); BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) - .addReg(IsLP64 ? X86::RAX : X86::EAX); + .addReg(IsLP64 ? X86::RAX : X86::EAX); BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); // Set up the CFG correctly. @@ -29650,7 +30179,7 @@ // Emit CALLSEQ_START right before the instruction. unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); MachineInstrBuilder CallseqStart = - BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); + BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); // Emit CALLSEQ_END right after the instruction. @@ -29658,7 +30187,7 @@ // original instruction around. unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); MachineInstrBuilder CallseqEnd = - BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); + BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); return BB; @@ -29682,9 +30211,10 @@ // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = - Subtarget.is64Bit() ? - Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : - Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); + Subtarget.is64Bit() + ? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() + : Subtarget.getRegisterInfo()->getCallPreservedMask(*F, + CallingConv::C); if (Subtarget.is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) @@ -29936,8 +30466,7 @@ MemOpndSlot = CurOp; MVT PVT = getPointerTy(MF->getDataLayout()); - assert((PVT == MVT::i64 || PVT == MVT::i32) && - "Invalid Pointer Size!"); + assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // @@ -29985,19 +30514,19 @@ LabelReg = MRI.createVirtualRegister(PtrRC); if (Subtarget.is64Bit()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) - .addReg(X86::RIP) - .addImm(0) - .addReg(0) - .addMBB(restoreMBB) - .addReg(0); + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(restoreMBB) + .addReg(0); } else { - const X86InstrInfo *XII = static_cast(TII); + const X86InstrInfo *XII = static_cast(TII); MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) - .addReg(XII->getGlobalBaseReg(MF)) - .addImm(0) - .addReg(0) - .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference()) - .addReg(0); + .addReg(XII->getGlobalBaseReg(MF)) + .addImm(0) + .addReg(0) + .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference()) + .addReg(0); } } else PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; @@ -30021,7 +30550,7 @@ // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) - .addMBB(restoreMBB); + .addMBB(restoreMBB); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MIB.addRegMask(RegInfo->getNoPreservedMask()); @@ -30034,10 +30563,11 @@ mainMBB->addSuccessor(sinkMBB); // sinkMBB: - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(X86::PHI), DstReg) - .addReg(mainDstReg).addMBB(mainMBB) - .addReg(restoreDstReg).addMBB(restoreMBB); + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) + .addReg(mainDstReg) + .addMBB(mainMBB) + .addReg(restoreDstReg) + .addMBB(restoreMBB); // restoreMBB: if (RegInfo->hasBasePointer(*MF)) { @@ -30048,9 +30578,9 @@ unsigned FramePtr = RegInfo->getFrameRegister(*MF); unsigned BasePtr = RegInfo->getBaseRegister(); unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; - addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), - FramePtr, true, X86FI->getRestoreBasePointerOffset()) - .setMIFlag(MachineInstr::FrameSetup); + addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), FramePtr, + true, X86FI->getRestoreBasePointerOffset()) + .setMIFlag(MachineInstr::FrameSetup); } BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); @@ -30146,7 +30676,9 @@ BuildMI(checkSspMBB, DL, TII->get(TestRROpc)) .addReg(SSPCopyReg) .addReg(SSPCopyReg); - BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); + BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)) + .addMBB(sinkMBB) + .addImm(X86::COND_E); checkSspMBB->addSuccessor(sinkMBB); checkSspMBB->addSuccessor(fallMBB); @@ -30176,7 +30708,9 @@ .addReg(SSPCopyReg); // Jump to sink in case PrevSSPReg <= SSPCopyReg. - BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE); + BuildMI(fallMBB, DL, TII->get(X86::JCC_1)) + .addMBB(sinkMBB) + .addImm(X86::COND_BE); fallMBB->addSuccessor(sinkMBB); fallMBB->addSuccessor(fixShadowMBB); @@ -30199,7 +30733,9 @@ .addImm(8); // Jump if the result of the shift is zero. - BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); + BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)) + .addMBB(sinkMBB) + .addImm(X86::COND_E); fixShadowMBB->addSuccessor(sinkMBB); fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); @@ -30234,7 +30770,9 @@ BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg); // Jump if the counter is not zero yet. - BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE); + BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)) + .addMBB(fixShadowLoopMBB) + .addImm(X86::COND_NE); fixShadowLoopMBB->addSuccessor(sinkMBB); fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); @@ -30254,11 +30792,10 @@ MI.memoperands_end()); MVT PVT = getPointerTy(MF->getDataLayout()); - assert((PVT == MVT::i64 || PVT == MVT::i32) && - "Invalid Pointer Size!"); + assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = - (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; + (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -30479,7 +31016,9 @@ BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) .addReg(IReg) .addImm(LPadList.size()); - BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE); + BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)) + .addMBB(TrapBB) + .addImm(X86::COND_AE); if (Subtarget.is64Bit()) { unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass); @@ -30610,7 +31149,8 @@ DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { - default: llvm_unreachable("Unexpected instr type to insert"); + default: + llvm_unreachable("Unexpected instr type to insert"); case X86::TLS_addr32: case X86::TLS_addr64: case X86::TLS_base_addr32: @@ -30701,50 +31241,68 @@ // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. int OrigCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false); - addFrameReference(BuildMI(*BB, MI, DL, - TII->get(X86::FNSTCW16m)), OrigCWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)), + OrigCWFrameIdx); // Load the old value of the control word... - unsigned OldCW = - MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); + unsigned OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW), OrigCWFrameIdx); // OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero. - unsigned NewCW = - MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); + unsigned NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW) - .addReg(OldCW, RegState::Kill).addImm(0xC00); + .addReg(OldCW, RegState::Kill) + .addImm(0xC00); // Extract to 16 bits. unsigned NewCW16 = - MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); + MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16) - .addReg(NewCW, RegState::Kill, X86::sub_16bit); + .addReg(NewCW, RegState::Kill, X86::sub_16bit); // Prepare memory for FLDCW. int NewCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), NewCWFrameIdx) - .addReg(NewCW16, RegState::Kill); + .addReg(NewCW16, RegState::Kill); // Reload the modified control word now... - addFrameReference(BuildMI(*BB, MI, DL, - TII->get(X86::FLDCW16m)), NewCWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)), + NewCWFrameIdx); // Get the X86 opcode to use. unsigned Opc; switch (MI.getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; - case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; - case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; - case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; - case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; - case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; - case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; - case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; - case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; + default: + llvm_unreachable("illegal opcode!"); + case X86::FP32_TO_INT16_IN_MEM: + Opc = X86::IST_Fp16m32; + break; + case X86::FP32_TO_INT32_IN_MEM: + Opc = X86::IST_Fp32m32; + break; + case X86::FP32_TO_INT64_IN_MEM: + Opc = X86::IST_Fp64m32; + break; + case X86::FP64_TO_INT16_IN_MEM: + Opc = X86::IST_Fp16m64; + break; + case X86::FP64_TO_INT32_IN_MEM: + Opc = X86::IST_Fp32m64; + break; + case X86::FP64_TO_INT64_IN_MEM: + Opc = X86::IST_Fp64m64; + break; + case X86::FP80_TO_INT16_IN_MEM: + Opc = X86::IST_Fp16m80; + break; + case X86::FP80_TO_INT32_IN_MEM: + Opc = X86::IST_Fp32m80; + break; + case X86::FP80_TO_INT64_IN_MEM: + Opc = X86::IST_Fp64m80; + break; } X86AddressMode AM = getAddressFromInstr(&MI, 0); @@ -30752,8 +31310,8 @@ .addReg(MI.getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(*BB, MI, DL, - TII->get(X86::FLDCW16m)), OrigCWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)), + OrigCWFrameIdx); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -30861,10 +31419,8 @@ // X86 Optimization Hooks //===----------------------------------------------------------------------===// -bool -X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, - const APInt &Demanded, - TargetLoweringOpt &TLO) const { +bool X86TargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { // Only optimize Ands to prevent shrinking a constant that could be // matched by movzx. if (Op.getOpcode() != ISD::AND) @@ -30928,16 +31484,15 @@ unsigned BitWidth = Known.getBitWidth(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); - assert((Opc >= ISD::BUILTIN_OP_END || - Opc == ISD::INTRINSIC_WO_CHAIN || - Opc == ISD::INTRINSIC_W_CHAIN || - Opc == ISD::INTRINSIC_VOID) && + assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN || + Opc == ISD::INTRINSIC_W_CHAIN || Opc == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); Known.resetAll(); switch (Opc) { - default: break; + default: + break; case X86ISD::SETCC: Known.Zero.setBitsFrom(1); break; @@ -31032,11 +31587,11 @@ break; } case X86ISD::CMOV: { - Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1); + Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1); + KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -31057,7 +31612,8 @@ unsigned NumElts = VT.getVectorNumElements(); if (Mask.size() == NumElts) { SmallVector DemandedOps(NumOps, APInt(NumElts, 0)); - Known.Zero.setAllBits(); Known.One.setAllBits(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) { if (!DemandedElts[i]) continue; @@ -31168,16 +31724,18 @@ case X86ISD::ANDNP: { unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (Tmp0 == 1) return 1; // Early out. + if (Tmp0 == 1) + return 1; // Early out. unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp0, Tmp1); } case X86ISD::CMOV: { - unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp0 == 1) return 1; // Early out. - unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); + unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp0 == 1) + return 1; // Early out. + unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1); return std::min(Tmp0, Tmp1); } } @@ -31225,8 +31783,8 @@ } if (Match) { unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); - MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() : - MVT::getIntegerVT(MaskEltSize); + MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() + : MVT::getIntegerVT(MaskEltSize); SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) @@ -31480,7 +32038,8 @@ // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle. if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) || - ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) || + ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && + Subtarget.hasInt256()) || ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) { if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG, Subtarget)) { @@ -31791,8 +32350,9 @@ // Attempt to match against broadcast-from-vector. // Limit AVX1 to cases where we're loading+broadcasting a scalar element. - if ((Subtarget.hasAVX2() || (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) - && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) { + if ((Subtarget.hasAVX2() || + (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) && + (!IsEVEXShuffle || NumRootElts == NumMaskElts)) { SmallVector BroadcastMask(NumMaskElts, 0); if (isTargetShuffleEquivalent(Mask, BroadcastMask)) { if (V1.getValueType() == MaskVT && @@ -31855,9 +32415,9 @@ NewV1 = V1; // Save operands in case early exit happens. NewV2 = V2; - if (matchBinaryPermuteShuffle( - MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, - NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && + if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, + AllowIntDomain, NewV1, NewV2, DL, DAG, + Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! @@ -32431,10 +32991,9 @@ } // Ok, we have non-zero lanes, map them through to one of the Op's inputs. - unsigned OpMaskedIdx = - OpRatio == 1 - ? OpMask[OpIdx] - : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1)); + unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx] + : (OpMask[OpIdx] << OpRatioLog2) + + (RootMaskedIdx & (OpRatio - 1)); OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); int InputIdx = OpMask[OpIdx] / (int)OpMask.size(); @@ -32573,9 +33132,9 @@ /// We walk up the chain and look for a combinable shuffle, skipping over /// shuffles that we could hoist this shuffle's transformation past without /// altering anything. -static SDValue -combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, - SelectionDAG &DAG) { +static SDValue combineRedundantDWordShuffle(SDValue N, + MutableArrayRef Mask, + SelectionDAG &DAG) { assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); @@ -32970,8 +33529,7 @@ if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && (V.getOpcode() == X86ISD::PSHUFLW || V.getOpcode() == X86ISD::PSHUFHW) && - V.getOpcode() != N.getOpcode() && - V.hasOneUse()) { + V.getOpcode() != N.getOpcode() && V.hasOneUse()) { SDValue D = peekThroughOneUseBitcasts(V.getOperand(0)); if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { SmallVector VMask = getPSHUFShuffleMask(V); @@ -33045,11 +33603,11 @@ /// operation. If true is returned then the operands of ADDSUB(SUBADD) operation /// are written to the parameters \p Opnd0 and \p Opnd1. /// -/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes -/// so it is easier to generically match. We also insert dummy vector shuffle -/// nodes for the operands which explicitly discard the lanes which are unused -/// by this operation to try to flow through the rest of the combiner -/// the fact that they're unused. +/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle +/// nodes so it is easier to generically match. We also insert dummy vector +/// shuffle nodes for the operands which explicitly discard the lanes which are +/// unused by this operation to try to flow through the rest of the combiner the +/// fact that they're unused. static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, bool &IsSubAdd) { @@ -33083,13 +33641,15 @@ // commute the FADD operands. SDValue LHS, RHS; if (V1.getOpcode() == ISD::FSUB) { - LHS = V1->getOperand(0); RHS = V1->getOperand(1); + LHS = V1->getOperand(0); + RHS = V1->getOperand(1); if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) return false; } else { assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode"); - LHS = V2->getOperand(0); RHS = V2->getOperand(1); + LHS = V2->getOperand(0); + RHS = V2->getOperand(1); if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) && (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS)) return false; @@ -33101,8 +33661,8 @@ return false; // It's a subadd if the vector in the even parity is an FADD. - IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD - : V2->getOpcode() == ISD::FADD; + IsSubAdd = + Op0Even ? V1->getOpcode() == ISD::FADD : V2->getOpcode() == ISD::FADD; Opnd0 = LHS; Opnd1 = RHS; @@ -33270,8 +33830,8 @@ // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X assert((HOp.getValueType() == MVT::v2f64 || - HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT && - "Unexpected type for h-op"); + HOp.getValueType() == MVT::v4f64) && + HOp.getValueType() == VT && "Unexpected type for h-op"); return HOp; } @@ -33378,7 +33938,8 @@ TLI.isOperationLegal(Opcode, VT)) { bool CanFold = false; switch (Opcode) { - default : break; + default: + break; case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -33439,7 +34000,8 @@ // TODO - merge this into combineX86ShufflesRecursively. APInt KnownUndef, KnownZero; APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); - if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, + DCI)) return SDValue(N, 0); } @@ -33455,13 +34017,20 @@ switch (In.getOpcode()) { default: break; - case X86ISD::CVTP2SI: case X86ISD::CVTP2UI: - case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI: - case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI: - case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI: - case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: - case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P: - case X86ISD::VFPROUND: case X86ISD::VMFPROUND: + case X86ISD::CVTP2SI: + case X86ISD::CVTP2UI: + case X86ISD::MCVTP2SI: + case X86ISD::MCVTP2UI: + case X86ISD::CVTTP2SI: + case X86ISD::CVTTP2UI: + case X86ISD::MCVTTP2SI: + case X86ISD::MCVTTP2UI: + case X86ISD::CVTSI2P: + case X86ISD::CVTUI2P: + case X86ISD::MCVTSI2P: + case X86ISD::MCVTUI2P: + case X86ISD::VFPROUND: + case X86ISD::VMFPROUND: if (In.getOperand(0).getValueType() == MVT::v2f64 || In.getOperand(0).getValueType() == MVT::v2i64) return N->getOperand(0); // return the bitcast @@ -33480,18 +34049,18 @@ SDValue MULUDQ = BC.getOperand(0); ShuffleVectorSDNode *SVOp = cast(N); ArrayRef Mask = SVOp->getMask(); - if (BC.hasOneUse() && MULUDQ.hasOneUse() && - Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) { + if (BC.hasOneUse() && MULUDQ.hasOneUse() && Mask[0] == 0 && Mask[1] == 2 && + Mask[2] == -1 && Mask[3] == -1) { SDValue Op0 = MULUDQ.getOperand(0); SDValue Op1 = MULUDQ.getOperand(1); if (Op0.getOpcode() == ISD::BITCAST && Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && Op0.getOperand(0).getValueType() == MVT::v4i32) { ShuffleVectorSDNode *SVOp0 = - cast(Op0.getOperand(0)); + cast(Op0.getOperand(0)); ArrayRef Mask2 = SVOp0->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { + if (Mask2[0] == 0 && Mask2[1] == -1 && Mask2[2] == 1 && + Mask2[3] == -1) { Op0 = SVOp0->getOperand(0); Op1 = DAG.getBitcast(MVT::v4i32, Op1); Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask); @@ -33502,10 +34071,10 @@ Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && Op1.getOperand(0).getValueType() == MVT::v4i32) { ShuffleVectorSDNode *SVOp1 = - cast(Op1.getOperand(0)); + cast(Op1.getOperand(0)); ArrayRef Mask2 = SVOp1->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { + if (Mask2[0] == 0 && Mask2[1] == -1 && Mask2[2] == 1 && + Mask2[3] == -1) { Op0 = DAG.getBitcast(MVT::v4i32, Op0); Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask); Op1 = SVOp1->getOperand(0); @@ -33741,8 +34310,7 @@ SDLoc DL(Op); SDValue Ext0 = extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); - SDValue ExtOp = - TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0); + SDValue ExtOp = TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0); SDValue UndefVec = TLO.DAG.getUNDEF(VT); SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); @@ -33881,7 +34449,7 @@ EVT VT = Op.getValueType(); unsigned BitWidth = OriginalDemandedBits.getBitWidth(); unsigned Opc = Op.getOpcode(); - switch(Opc) { + switch (Opc) { case X86ISD::PMULDQ: case X86ISD::PMULUDQ: { // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element. @@ -34037,8 +34605,8 @@ return true; KnownBits KnownVec; - if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, - KnownVec, TLO, Depth + 1)) + if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, KnownVec, + TLO, Depth + 1)) return true; Known = KnownVec.zext(BitWidth, true); @@ -34146,8 +34714,9 @@ /// folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but /// shuffles have been custom lowered so we need to handle those here. -static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { +static SDValue +XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -34219,7 +34788,7 @@ LoadSDNode *LN0 = cast(LdNode); - if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) return SDValue(); // If there's a bitcast before the shuffle, check if the load type and @@ -34238,8 +34807,8 @@ // Create shuffle node taking into account the case that its a unary shuffle SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1]; - Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle, - ShuffleMask); + Shuffle = + DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle, ShuffleMask); Shuffle = DAG.getBitcast(OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); @@ -34375,8 +34944,7 @@ // Convert a vXi1 constant build vector to the same width scalar integer. static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { EVT SrcVT = Op.getValueType(); - assert(SrcVT.getVectorElementType() == MVT::i1 && - "Expected a vXi1 vector"); + assert(SrcVT.getVectorElementType() == MVT::i1 && "Expected a vXi1 vector"); assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && "Expected a constant build vector"); @@ -34410,8 +34978,7 @@ return SDValue(); // Look for logic ops. - if (Op.getOpcode() != ISD::AND && - Op.getOpcode() != ISD::OR && + if (Op.getOpcode() != ISD::AND && Op.getOpcode() != ISD::OR && Op.getOpcode() != ISD::XOR) return SDValue(); @@ -34586,7 +35153,7 @@ bool LowUndef = true, AllUndefOrZero = true; for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) { SDValue Op = N0.getOperand(i); - LowUndef &= Op.isUndef() || (i >= e/2); + LowUndef &= Op.isUndef() || (i >= e / 2); AllUndefOrZero &= (Op.isUndef() || isNullConstant(Op)); } if (AllUndefOrZero) { @@ -34628,15 +35195,14 @@ // Try to remove a bitcast of constant vXi1 vector. We have to legalize // most of these to scalar anyway. - if (Subtarget.hasAVX512() && VT.isScalarInteger() && - SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && + if (Subtarget.hasAVX512() && VT.isScalarInteger() && SrcVT.isVector() && + SrcVT.getVectorElementType() == MVT::i1 && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { return combinevXi1ConstantToInteger(N0, DAG); } - if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && - VT.isVector() && VT.getVectorElementType() == MVT::i1 && - isa(N0)) { + if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() && + VT.getVectorElementType() == MVT::i1 && isa(N0)) { auto *C = cast(N0); if (C->isAllOnesValue()) return DAG.getConstant(1, SDLoc(N0), VT); @@ -34656,10 +35222,17 @@ // transferring the SSE operand to integer register and back. unsigned FPOpcode; switch (N0.getOpcode()) { - case ISD::AND: FPOpcode = X86ISD::FAND; break; - case ISD::OR: FPOpcode = X86ISD::FOR; break; - case ISD::XOR: FPOpcode = X86ISD::FXOR; break; - default: return SDValue(); + case ISD::AND: + FPOpcode = X86ISD::FAND; + break; + case ISD::OR: + FPOpcode = X86ISD::FOR; + break; + case ISD::XOR: + FPOpcode = X86ISD::FXOR; + break; + default: + return SDValue(); } if (!((Subtarget.hasSSE1() && VT == MVT::f32) || @@ -34735,7 +35308,7 @@ return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops); }; MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64); - return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 }, + return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, {SadOp0, SadOp1}, PSADBWBuilder); } @@ -34817,7 +35390,8 @@ DAG.getIntPtrConstant(0, DL)); } -// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK. +// Attempt to replace an all_of/any_of/parity style horizontal reduction with a +// MOVMSK. static SDValue combineHorizontalPredicateResult(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -34980,8 +35554,8 @@ // (extends the sign bit which is zero). // So it is correct to skip the sign/zero extend instruction. if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || - Root.getOpcode() == ISD::ZERO_EXTEND || - Root.getOpcode() == ISD::ANY_EXTEND)) + Root.getOpcode() == ISD::ZERO_EXTEND || + Root.getOpcode() == ISD::ANY_EXTEND)) Root = Root.getOperand(0); // If there was a match, we want Root to be a select that is the root of an @@ -35005,9 +35579,9 @@ if (Stages > 3) { unsigned SadElems = SadVT.getVectorNumElements(); - for(unsigned i = Stages - 3; i > 0; --i) { + for (unsigned i = Stages - 3; i > 0; --i) { SmallVector Mask(SadElems, -1); - for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) + for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) Mask[j] = MaskEnd + j; SDValue Shuffle = @@ -35178,15 +35752,15 @@ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Vec.getOperand(0).getValueType().getScalarType(), Vec.getOperand(0), Index); - SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, - Vec.getOperand(1), Index); - SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, - Vec.getOperand(2), Index); + SDValue Ext1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(1), Index); + SDValue Ext2 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(2), Index); return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2); } // TODO: This switch could include FNEG and the x86-specific FP logic ops - // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid + // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid // missed load folding and fma+fneg combining. switch (Vec.getOpcode()) { case ISD::FMA: // Begin 3 operands @@ -35330,8 +35904,7 @@ } return false; }; - if (all_of(InputVector->uses(), IsBoolExtract) && - BoolExtracts.size() > 1) { + if (all_of(InputVector->uses(), IsBoolExtract) && BoolExtracts.size() > 1) { unsigned NumSrcElts = SrcVT.getVectorNumElements(); EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts); if (SDValue BC = @@ -35526,11 +36099,10 @@ /// This function will also call SimplifyDemandedBits on already created /// BLENDV to perform additional simplifications. static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); - if ((N->getOpcode() != ISD::VSELECT && - N->getOpcode() != X86ISD::BLENDV) || + if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) || ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); @@ -35593,8 +36165,8 @@ if (U->getOpcode() == X86ISD::BLENDV) continue; - SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), - Cond, U->getOperand(1), U->getOperand(2)); + SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), Cond, + U->getOperand(1), U->getOperand(2)); DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); DCI.AddToWorklist(U); } @@ -35645,7 +36217,8 @@ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { - default: break; + default: + break; case ISD::SETULT: // Converting this to a min would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive @@ -35710,11 +36283,12 @@ Opcode = X86ISD::FMAX; break; } - // Check for x CC y ? y : x -- a min/max with reversed arms. + // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { - default: break; + default: + break; case ISD::SETOGE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would @@ -35877,12 +36451,13 @@ DAG.isEqualTo(RHS, Cond.getOperand(1))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); switch (CC) { - default: break; + default: + break; case ISD::SETLT: case ISD::SETGT: { ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; - Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), - Cond.getOperand(0), Cond.getOperand(1), NewCC); + Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), + Cond.getOperand(1), NewCC); return DAG.getSelect(DL, VT, Cond, LHS, RHS); } } @@ -36160,7 +36735,7 @@ SDValue Op2 = Cmp.getOperand(1); SDValue SetCC; - const ConstantSDNode* C = nullptr; + const ConstantSDNode *C = nullptr; bool needOppositeCond = (CC == X86::COND_E); bool checkAgainstTrue = false; // Is it a comparison against 1? @@ -36181,8 +36756,7 @@ bool truncatedToBoolWithAnd = false; // Skip (zext $x), (trunc $x), or (and $x, 1) node. while (SetCC.getOpcode() == ISD::ZERO_EXTEND || - SetCC.getOpcode() == ISD::TRUNCATE || - SetCC.getOpcode() == ISD::AND) { + SetCC.getOpcode() == ISD::TRUNCATE || SetCC.getOpcode() == ISD::AND) { if (SetCC.getOpcode() == ISD::AND) { int OpIdx = -1; if (isOneConstant(SetCC.getOperand(0))) @@ -36225,13 +36799,13 @@ if (!FVal) { SDValue Op = SetCC.getOperand(0); // Skip 'zext' or 'trunc' node. - if (Op.getOpcode() == ISD::ZERO_EXTEND || - Op.getOpcode() == ISD::TRUNCATE) + if (Op.getOpcode() == ISD::ZERO_EXTEND || Op.getOpcode() == ISD::TRUNCATE) Op = Op.getOperand(0); // A special case for rdrand/rdseed, where 0 is set if false cond is // found. if ((Op.getOpcode() != X86ISD::RDRAND && - Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) + Op.getOpcode() != X86ISD::RDSEED) || + Op.getResNo() != 0) return SDValue(); } // Quit if false value is not the constant 0 or 1. @@ -36276,7 +36850,8 @@ SDValue SetCC0, SetCC1; switch (Cond->getOpcode()) { - default: return false; + default: + return false; case ISD::AND: case X86ISD::AND: isAnd = true; @@ -36307,12 +36882,12 @@ if (EFLAGS.getOpcode() == X86ISD::ADD) { if (isAllOnesConstant(EFLAGS.getOperand(1))) { SDValue Carry = EFLAGS.getOperand(0); - while (Carry.getOpcode() == ISD::TRUNCATE || - Carry.getOpcode() == ISD::ZERO_EXTEND || - Carry.getOpcode() == ISD::SIGN_EXTEND || - Carry.getOpcode() == ISD::ANY_EXTEND || - (Carry.getOpcode() == ISD::AND && - isOneConstant(Carry.getOperand(1)))) + while ( + Carry.getOpcode() == ISD::TRUNCATE || + Carry.getOpcode() == ISD::ZERO_EXTEND || + Carry.getOpcode() == ISD::SIGN_EXTEND || + Carry.getOpcode() == ISD::ANY_EXTEND || + (Carry.getOpcode() == ISD::AND && isOneConstant(Carry.getOperand(1)))) Carry = Carry.getOperand(0); if (Carry.getOpcode() == X86ISD::SETCC || Carry.getOpcode() == X86ISD::SETCC_CARRY) { @@ -36340,8 +36915,7 @@ } // If this is a check of the z flag of an add with 1, switch to the // C flag. - if (CarryCC == X86::COND_E && - CarryOp1.getOpcode() == X86ISD::ADD && + if (CarryCC == X86::COND_E && CarryOp1.getOpcode() == X86ISD::ADD && isOneConstant(CarryOp1.getOperand(1))) return CarryOp1; } @@ -36382,7 +36956,7 @@ if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) { SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), - Flags}; + Flags}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -36417,12 +36991,11 @@ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient // for any integer data type, including i8/i16. - if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { + if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) { Cond = getSETCC(CC, Cond, DL, DAG); // Zero extend the condition if needed. - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, - FalseC->getValueType(0), Cond); + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); return Cond; @@ -36431,31 +37004,33 @@ // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { - uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); - if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; + uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + Diff = (unsigned)Diff; bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { - default: break; - case 1: // result = add base, cond - case 2: // result = lea base( , cond*2) - case 3: // result = lea base(cond, cond*2) - case 4: // result = lea base( , cond*4) - case 5: // result = lea base(cond, cond*4) - case 8: // result = lea base( , cond*8) - case 9: // result = lea base(cond, cond*8) + default: + break; + case 1: // result = add base, cond + case 2: // result = lea base( , cond*2) + case 3: // result = lea base(cond, cond*2) + case 4: // result = lea base( , cond*4) + case 5: // result = lea base(cond, cond*4) + case 8: // result = lea base( , cond*8) + case 9: // result = lea base(cond, cond*8) isFastMultiplier = true; break; } } if (isFastMultiplier) { - APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); - Cond = getSETCC(CC, Cond, DL ,DAG); + APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); + Cond = getSETCC(CC, Cond, DL, DAG); // Zero extend the condition if needed. - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), - Cond); + Cond = + DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); // Scale the condition by the difference. if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, @@ -36500,10 +37075,9 @@ std::swap(TrueOp, FalseOp); } - if (CC == X86::COND_E && - CmpAgainst == dyn_cast(TrueOp)) { - SDValue Ops[] = { FalseOp, Cond.getOperand(0), - DAG.getConstant(CC, DL, MVT::i8), Cond }; + if (CC == X86::COND_E && CmpAgainst == dyn_cast(TrueOp)) { + SDValue Ops[] = {FalseOp, Cond.getOperand(0), + DAG.getConstant(CC, DL, MVT::i8), Cond}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -36538,7 +37112,7 @@ } SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), - Flags}; + Flags}; SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); @@ -36572,9 +37146,9 @@ EVT VT = N->getValueType(0); // This should constant fold. SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); - SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), - DAG.getConstant(X86::COND_NE, DL, MVT::i8), - Cond); + SDValue CMov = + DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), + DAG.getConstant(X86::COND_NE, DL, MVT::i8), Cond); return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); } } @@ -36890,8 +37464,7 @@ return SDValue(); APInt Mask17 = APInt::getHighBitsSet(32, 17); - if (!DAG.MaskedValueIsZero(N1, Mask17) || - !DAG.MaskedValueIsZero(N0, Mask17)) + if (!DAG.MaskedValueIsZero(N1, Mask17) || !DAG.MaskedValueIsZero(N0, Mask17)) return SDValue(); // Use SplitOpsAndApply to handle AVX splitting. @@ -36901,7 +37474,7 @@ return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); }; return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) }, + {DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1)}, PMADDWDBuilder); } @@ -36929,8 +37502,8 @@ ArrayRef Ops) { return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops); }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, - PMULDQBuilder, /*CheckBWI*/false); + return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1}, + PMULDQBuilder, /*CheckBWI*/ false); } // If the upper bits are zero we can use a single pmuludq. @@ -36940,8 +37513,8 @@ ArrayRef Ops) { return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops); }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, - PMULUDQBuilder, /*CheckBWI*/false); + return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1}, + PMULUDQBuilder, /*CheckBWI*/ false); } return SDValue(); @@ -36990,8 +37563,8 @@ SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(AbsMulAmt, DL, VT)); if (SignMulAmt < 0) - NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - NewMul); + NewMul = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); return NewMul; } @@ -37015,9 +37588,8 @@ (isPowerOf2_64(MulAmt2) || (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) { - if (isPowerOf2_64(MulAmt2) && - !(SignMulAmt >= 0 && N->hasOneUse() && - N->use_begin()->getOpcode() == ISD::ADD)) + if (isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 && N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::ADD)) // If second multiplifer is pow2, issue it first. We want the multiply by // 3, 5, or 9 to be folded into the addressing mode unless the lone use // is an add. Only do this for positive multiply amounts since the @@ -37040,8 +37612,8 @@ // Negate the result. if (SignMulAmt < 0) - NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - NewMul); + NewMul = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); } else if (!Subtarget.slowLEA()) NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); @@ -37055,17 +37627,16 @@ NewMul = DAG.getNode( ISD::ADD, DL, VT, N->getOperand(0), DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, - MVT::i8))); + DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, MVT::i8))); // To negate, subtract the number from zero if (SignMulAmt < 0) - NewMul = DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), NewMul); + NewMul = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); } else if (isPowerOf2_64(AbsMulAmt + 1)) { // (mul x, 2^N - 1) => (sub (shl x, N), x) - NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(AbsMulAmt + 1), - DL, MVT::i8)); + NewMul = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(AbsMulAmt + 1), DL, MVT::i8)); // To negate, reverse the operands of the subtract. if (SignMulAmt < 0) NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul); @@ -37073,16 +37644,16 @@ NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) { // (mul x, 2^N + 2) => (add (add (shl x, N), x), x) - NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(AbsMulAmt - 2), - DL, MVT::i8)); + NewMul = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(AbsMulAmt - 2), DL, MVT::i8)); NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) { // (mul x, 2^N - 2) => (sub (sub (shl x, N), x), x) - NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(AbsMulAmt + 2), - DL, MVT::i8)); + NewMul = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(AbsMulAmt + 2), DL, MVT::i8)); NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); } @@ -37099,8 +37670,7 @@ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) // since the result of setcc_c is all zero's or all ones. - if (VT.isInteger() && !VT.isVector() && - N1C && N0.getOpcode() == ISD::AND && + if (VT.isInteger() && !VT.isVector() && N1C && N0.getOpcode() == ISD::AND && N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); APInt Mask = N0.getConstantOperandAPInt(1); @@ -37181,7 +37751,7 @@ if (SarConst.isNegative()) return SDValue(); - for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { + for (MVT SVT : {MVT::i8, MVT::i16, MVT::i32}) { unsigned ShiftSize = SVT.getSizeInBits(); // skipping types without corresponding sext/zext and // ShlConst that is not one of [56,48,32,24,16] @@ -37327,8 +37897,8 @@ // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular // truncate to create a larger truncate. - if (Subtarget.hasAVX512() && - N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && + if (Subtarget.hasAVX512() && N0.getOpcode() == ISD::TRUNCATE && + N1.isUndef() && VT == MVT::v16i8 && N0.getOperand(0).getValueType() == MVT::v8i32) { if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) || (!IsSigned && @@ -37508,7 +38078,7 @@ SDValue CMP00 = CMP0->getOperand(0); SDValue CMP01 = CMP0->getOperand(1); - EVT VT = CMP00.getValueType(); + EVT VT = CMP00.getValueType(); if (VT == MVT::f32 || VT == MVT::f64) { bool ExpectingFlags = false; @@ -37530,8 +38100,10 @@ } if (!ExpectingFlags) { - enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); - enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); + enum X86::CondCode cc0 = + (enum X86::CondCode)N0.getConstantOperandVal(0); + enum X86::CondCode cc1 = + (enum X86::CondCode)N1.getConstantOperandVal(0); if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { X86::CondCode tmp = cc0; @@ -37539,7 +38111,7 @@ cc1 = tmp; } - if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || + if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). @@ -37549,17 +38121,17 @@ DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); // Need to fill with zeros to ensure the bitcast will produce zeroes - // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. + // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee + // that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, DAG.getConstant(0, DL, MVT::v16i1), FSetCC, DAG.getIntPtrConstant(0, DL)); return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, N->getSimpleValueType(0)); } - SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, - CMP00.getValueType(), CMP00, CMP01, - DAG.getConstant(x86cc, DL, - MVT::i8)); + SDValue OnesOrZeroesF = + DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, + CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; @@ -37570,8 +38142,8 @@ // OnesOrZeroesF is all ones of all zeroes, we don't need all the // bits, but can do this little dance to extract the lowest 32 bits // and work with those going forward. - SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, - OnesOrZeroesF); + SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, + MVT::v2f64, OnesOrZeroesF); SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Vector32, DAG.getIntPtrConstant(0, DL)); @@ -37581,8 +38153,8 @@ SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, DAG.getConstant(1, DL, IntVT)); - SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, - ANDed); + SDValue OneBitOfTruth = + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); return OneBitOfTruth; } } @@ -37648,18 +38220,18 @@ assert((N->getOpcode() == ISD::ANY_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND || - N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); + N->getOpcode() == ISD::SIGN_EXTEND) && + "Invalid Node"); SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow.getValueType(); - if (Narrow->getOpcode() != ISD::XOR && - Narrow->getOpcode() != ISD::AND && + if (Narrow->getOpcode() != ISD::XOR && Narrow->getOpcode() != ISD::AND && Narrow->getOpcode() != ISD::OR) return SDValue(); - SDValue N0 = Narrow->getOperand(0); - SDValue N1 = Narrow->getOperand(1); + SDValue N0 = Narrow->getOperand(0); + SDValue N1 = Narrow->getOperand(1); SDLoc DL(Narrow); // The Left side has to be a trunc. @@ -37671,10 +38243,9 @@ return SDValue(); // The right side has to be a 'trunc' or a constant vector. - bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getValueType() == VT; - if (!RHSTrunc && - !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) + bool RHSTrunc = + N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getValueType() == VT; + if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -37693,14 +38264,15 @@ SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1); unsigned Opcode = N->getOpcode(); switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); + default: + llvm_unreachable("Unexpected opcode"); case ISD::ANY_EXTEND: return Op; case ISD::ZERO_EXTEND: return DAG.getZeroExtendInReg(Op, DL, NarrowVT.getScalarType()); case ISD::SIGN_EXTEND: - return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, - Op, DAG.getValueType(NarrowVT)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, + DAG.getValueType(NarrowVT)); } } @@ -37723,17 +38295,23 @@ EVT N10Type = N10.getValueType(); // Ensure that both types are the same and are legal scalar fp types. - if (N00Type != N10Type || - !((Subtarget.hasSSE1() && N00Type == MVT::f32) || - (Subtarget.hasSSE2() && N00Type == MVT::f64))) + if (N00Type != N10Type || !((Subtarget.hasSSE1() && N00Type == MVT::f32) || + (Subtarget.hasSSE2() && N00Type == MVT::f64))) return SDValue(); unsigned FPOpcode; switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected input node for FP logic conversion"); - case ISD::AND: FPOpcode = X86ISD::FAND; break; - case ISD::OR: FPOpcode = X86ISD::FOR; break; - case ISD::XOR: FPOpcode = X86ISD::FXOR; break; + default: + llvm_unreachable("Unexpected input node for FP logic conversion"); + case ISD::AND: + FPOpcode = X86ISD::FAND; + break; + case ISD::OR: + FPOpcode = X86ISD::FOR; + break; + case ISD::XOR: + FPOpcode = X86ISD::FXOR; + break; } SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); @@ -37793,15 +38371,17 @@ return SDValue(); return ShiftedIndex.getOperand(0); - } static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { if (Subtarget.hasBMI2() && VT.isScalarInteger()) { switch (VT.getSizeInBits()) { - default: return false; - case 64: return Subtarget.is64Bit() ? true : false; - case 32: return true; + default: + return false; + case 64: + return Subtarget.is64Bit() ? true : false; + case 32: + return true; } } return false; @@ -37831,7 +38411,7 @@ SDValue N = Node->getOperand(i); LoadSDNode *Ld = dyn_cast(N.getNode()); - // continue if the operand is not a load instruction + // continue if the operand is not a load instruction if (!Ld) return SDValue(); @@ -37927,9 +38507,9 @@ // If this is 64-bit, its always best to xor the two 32-bit pieces together // even if we have popcnt. if (VT == MVT::i64) { - SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, - DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(32, DL, MVT::i8))); + SDValue Hi = DAG.getNode( + ISD::TRUNCATE, DL, MVT::i32, + DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(32, DL, MVT::i8))); SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); // Generate a 32-bit parity idiom. This will bring us back here if we need @@ -37942,16 +38522,16 @@ assert(VT == MVT::i32 && "Unexpected VT!"); // Xor the high and low 16-bits together using a 32-bit operation. - SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(16, DL, MVT::i8)); + SDValue Hi16 = + DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(16, DL, MVT::i8)); X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16); // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. // This should allow an h-reg to be used to save a shift. // FIXME: We only get an h-reg in 32-bit mode. - SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, - DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(8, DL, MVT::i8))); + SDValue Hi = DAG.getNode( + ISD::TRUNCATE, DL, MVT::i8, + DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(8, DL, MVT::i8))); SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); @@ -38603,15 +39183,25 @@ return SDValue(); switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); + default: + return SDValue(); case MVT::v16i8: case MVT::v8i16: - case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break; - case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break; + case MVT::v4i32: + if (!Subtarget.hasSSE2()) + return SDValue(); + break; + case MVT::v2i64: + if (!Subtarget.hasSSE42()) + return SDValue(); + break; case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: - case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break; + case MVT::v4i64: + if (!Subtarget.hasAVX2()) + return SDValue(); + break; } // There must be a shift right algebraic before the xor, and the xor must be a @@ -38799,8 +39389,7 @@ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); } if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && - !Subtarget.hasAVX512() && - (SVT == MVT::i8 || SVT == MVT::i16) && + !Subtarget.hasAVX512() && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). @@ -38835,8 +39424,8 @@ unsigned NumElems = VT.getVectorNumElements(); EVT ScalarVT = VT.getVectorElementType(); - if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && - NumElems >= 2 && isPowerOf2_32(NumElems))) + if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2 && + isPowerOf2_32(NumElems))) return SDValue(); // InScalarVT is the intermediate type in AVG pattern and it should be greater @@ -38907,7 +39496,7 @@ Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); return SplitOpsAndApply(DAG, Subtarget, DL, VT, - { Operands[0].getOperand(0), Operands[1] }, + {Operands[0].getOperand(0), Operands[1]}, AVGBuilder); } @@ -38976,13 +39565,13 @@ // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; - unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) || (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, - AddressSpace, Alignment, &Fast) && !Fast))) { + *Ld->getMemOperand(), &Fast) && + !Fast))) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -38990,19 +39579,17 @@ SDValue Ptr = Ld->getBasePtr(); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - NumElems/2); + NumElems / 2); SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Alignment, Ld->getMemOperand()->getFlags()); Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl); - SDValue Load2 = - DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, - Ld->getPointerInfo().getWithOffset(16), - MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); + SDValue Load2 = DAG.getLoad( + HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Load1.getValue(1), - Load2.getValue(1)); + Load1.getValue(1), Load2.getValue(1)); SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); return DCI.CombineTo(N, NewVec, TF, true); @@ -39135,8 +39722,8 @@ if (LoadFirstElt && LoadLastElt) { SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(), ML->getMemOperand()); - SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, - ML->getPassThru()); + SDValue Blend = + DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getPassThru()); return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true); } @@ -39154,12 +39741,11 @@ // The new masked load has an undef pass-through operand. The select uses the // original pass-through operand. - SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(), - ML->getMask(), DAG.getUNDEF(VT), - ML->getMemoryVT(), ML->getMemOperand(), - ML->getExtensionType()); - SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, - ML->getPassThru()); + SDValue NewML = DAG.getMaskedLoad( + VT, DL, ML->getChain(), ML->getBasePtr(), ML->getMask(), DAG.getUNDEF(VT), + ML->getMemoryVT(), ML->getMemOperand(), ML->getExtensionType()); + SDValue Blend = + DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru()); return DCI.CombineTo(ML, Blend, NewML.getValue(1), true); } @@ -39195,15 +39781,15 @@ unsigned ToSz = VT.getScalarSizeInBits(); unsigned FromSz = LdVT.getScalarSizeInBits(); // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for extending masked load"); + assert(isPowerOf2_32(NumElems * FromSz * ToSz) && + "Unexpected size for extending masked load"); - unsigned SizeRatio = ToSz / FromSz; + unsigned SizeRatio = ToSz / FromSz; assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - LdVT.getScalarType(), NumElems*SizeRatio); + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), LdVT.getScalarType(), + NumElems * SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); // Convert PassThru value. @@ -39217,7 +39803,7 @@ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && "WideVecVT should be legal"); WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru, - DAG.getUNDEF(WideVecVT), ShuffleVec); + DAG.getUNDEF(WideVecVT), ShuffleVec); } // Prepare the new mask. @@ -39231,15 +39817,13 @@ ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) ShuffleVec[i] = NumElems * SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); + NewMask = DAG.getVectorShuffle( + WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), ShuffleVec); } else { assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; + unsigned WidenNumElts = NumElems * SizeRatio; unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); + EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts); unsigned NumConcat = WidenNumElts / MaskNumElts; SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); @@ -39248,10 +39832,9 @@ NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } - SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), - Mld->getBasePtr(), NewMask, WidePassThru, - Mld->getMemoryVT(), Mld->getMemOperand(), - ISD::NON_EXTLOAD); + SDValue WideLd = DAG.getMaskedLoad( + WideVecVT, dl, Mld->getChain(), Mld->getBasePtr(), NewMask, WidePassThru, + Mld->getMemoryVT(), Mld->getMemOperand(), ISD::NON_EXTLOAD); SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd); SmallVector ShuffleVec(NumElems * SizeRatio, -1); @@ -39287,8 +39870,8 @@ SDLoc DL(MS); EVT VT = MS->getValue().getValueType(); EVT EltVT = VT.getVectorElementType(); - SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - MS->getValue(), VecIndex); + SDValue Extract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, MS->getValue(), VecIndex); // Store that element at the appropriate offset from the base pointer. return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(), @@ -39329,8 +39912,8 @@ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), Mst->getMemoryVT())) { return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); + Mst->getBasePtr(), Mask, Mst->getMemoryVT(), + Mst->getMemOperand(), true); } return SDValue(); @@ -39351,19 +39934,19 @@ return SDValue(); // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for truncating masked store"); + assert(isPowerOf2_32(NumElems * FromSz * ToSz) && + "Unexpected size for truncating masked store"); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. - assert (((NumElems * FromSz) % ToSz) == 0 && - "Unexpected ratio for truncating masked store"); + assert(((NumElems * FromSz) % ToSz) == 0 && + "Unexpected ratio for truncating masked store"); - unsigned SizeRatio = FromSz / ToSz; + unsigned SizeRatio = FromSz / ToSz; assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems * SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); @@ -39376,9 +39959,8 @@ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && "WideVecVT should be legal"); - SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); + SDValue TruncatedVal = DAG.getVectorShuffle( + WideVecVT, dl, WideVec, DAG.getUNDEF(WideVecVT), ShuffleVec); SDValue NewMask; SDValue Mask = Mst->getMask(); @@ -39387,17 +39969,15 @@ NewMask = DAG.getBitcast(WideVecVT, Mask); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) - ShuffleVec[i] = NumElems*SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); + for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) + ShuffleVec[i] = NumElems * SizeRatio; + NewMask = DAG.getVectorShuffle( + WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), ShuffleVec); } else { assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; + unsigned WidenNumElts = NumElems * SizeRatio; unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); + EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts); unsigned NumConcat = WidenNumElts / MaskNumElts; SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); @@ -39456,15 +40036,16 @@ // Turn vXi1 stores of constants into a scalar store. if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || - VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && + VT == MVT::v64i1) && + VT == StVT && TLI.isTypeLegal(VT) && ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) { // If its a v64i1 store without 64-bit support, we need two stores. if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { - SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl, - StoredVal->ops().slice(0, 32)); + SDValue Lo = + DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(0, 32)); Lo = combinevXi1ConstantToInteger(Lo, DAG); - SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl, - StoredVal->ops().slice(32, 32)); + SDValue Hi = + DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(32, 32)); Hi = combinevXi1ConstantToInteger(Hi, DAG); unsigned Alignment = St->getAlignment(); @@ -39475,11 +40056,9 @@ SDValue Ch0 = DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); - SDValue Ch1 = - DAG.getStore(St->getChain(), dl, Hi, Ptr1, - St->getPointerInfo().getWithOffset(4), - MinAlign(Alignment, 4U), - St->getMemOperand()->getFlags()); + SDValue Ch1 = DAG.getStore( + St->getChain(), dl, Hi, Ptr1, St->getPointerInfo().getWithOffset(4), + MinAlign(Alignment, 4U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } @@ -39492,11 +40071,9 @@ // If we are saving a concatenation of two XMM registers and 32-byte stores // are slow, such as on Sandy Bridge, perform two 16-byte stores. bool Fast; - unsigned AddressSpace = St->getAddressSpace(); - unsigned Alignment = St->getAlignment(); if (VT.is256BitVector() && StVT == VT && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - AddressSpace, Alignment, &Fast) && + *St->getMemOperand(), &Fast) && !Fast) { unsigned NumElems = VT.getVectorNumElements(); if (NumElems < 2) @@ -39518,17 +40095,16 @@ St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - if (SDValue Val = - detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget, - TLI)) - return EmitTruncSStore(true /* Signed saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); + if (SDValue Val = detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), + Subtarget, TLI)) + return EmitTruncSStore(true /* Signed saturation */, St->getChain(), dl, + Val, St->getBasePtr(), St->getMemoryVT(), + St->getMemOperand(), DAG); if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), DAG, dl, Subtarget, TLI)) return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); + dl, Val, St->getBasePtr(), St->getMemoryVT(), + St->getMemOperand(), DAG); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -39543,18 +40119,20 @@ return SDValue(); // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); + if (!isPowerOf2_32(NumElems * FromSz * ToSz)) + return SDValue(); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromSz) % ToSz) return SDValue(); + if (0 != (NumElems * FromSz) % ToSz) + return SDValue(); - unsigned SizeRatio = FromSz / ToSz; + unsigned SizeRatio = FromSz / ToSz; assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems * SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); @@ -39568,8 +40146,7 @@ return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); + DAG.getUNDEF(WideVecVT), ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. @@ -39586,18 +40163,19 @@ StoreType = MVT::f64; // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); + EVT StoreVecVT = + EVT::getVectorVT(*DAG.getContext(), StoreType, + VT.getSizeInBits() / StoreType.getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); SmallVector Chains; SDValue Ptr = St->getBasePtr(); // Perform one or more big stores into memory. - for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - StoreType, ShuffWide, - DAG.getIntPtrConstant(i, dl)); + for (unsigned i = 0, e = (ToSz * NumElems) / StoreType.getSizeInBits(); + i != e; ++i) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StoreType, + ShuffWide, DAG.getIntPtrConstant(i, dl)); SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); @@ -39644,8 +40222,9 @@ // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget.is64Bit() || F64IsLegal) { - MVT LdVT = (Subtarget.is64Bit() && - (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64; + MVT LdVT = (Subtarget.is64Bit() && (!VT.isFloatingPoint() || !F64IsLegal)) + ? MVT::i64 + : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); @@ -40122,8 +40701,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // First instruction should be a right shift of a multiply. - if (Src.getOpcode() != ISD::SRL || - Src.getOperand(0).getOpcode() != ISD::MUL) + if (Src.getOpcode() != ISD::SRL || Src.getOperand(0).getOpcode() != ISD::MUL) return SDValue(); if (!Subtarget.hasSSE2()) @@ -40176,8 +40754,7 @@ // (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))), // (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B)))))))) static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - const SDLoc &DL) { + const X86Subtarget &Subtarget, const SDLoc &DL) { if (!VT.isVector() || !Subtarget.hasSSSE3()) return SDValue(); @@ -40273,8 +40850,8 @@ std::swap(IdxN01, IdxN11); } // N0 indices be the even element. N1 indices must be the next odd element. - if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || - IdxN01 != 2 * i || IdxN11 != 2 * i + 1) + if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i || + IdxN11 != 2 * i + 1) return SDValue(); SDValue N00In = N00Elt.getOperand(0); SDValue N01In = N01Elt.getOperand(0); @@ -40285,8 +40862,8 @@ ZExtIn = N00In; SExtIn = N01In; } - if (ZExtIn != N00In || SExtIn != N01In || - ZExtIn != N10In || SExtIn != N11In) + if (ZExtIn != N00In || SExtIn != N01In || ZExtIn != N10In || + SExtIn != N11In) return SDValue(); } @@ -40295,14 +40872,13 @@ // Shrink by adding truncate nodes and let DAGCombine fold with the // sources. EVT InVT = Ops[0].getValueType(); - assert(InVT.getScalarType() == MVT::i8 && - "Unexpected scalar element type"); + assert(InVT.getScalarType() == MVT::i8 && "Unexpected scalar element type"); assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, InVT.getVectorNumElements() / 2); return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]); }; - return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, + return SplitOpsAndApply(DAG, Subtarget, DL, VT, {ZExtIn, SExtIn}, PMADDBuilder); } @@ -40451,22 +41027,38 @@ unsigned NewOpcode = 0; if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) { switch (Arg.getOpcode()) { - case ISD::FMA: NewOpcode = X86ISD::FNMSUB; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB: NewOpcode = ISD::FMA; break; - case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break; - case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break; - case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break; - // We can't handle scalar intrinsic node here because it would only - // invert one element and not the whole vector. But we could try to handle - // a negation of the lower element only. + case ISD::FMA: + NewOpcode = X86ISD::FNMSUB; + break; + case X86ISD::FMSUB: + NewOpcode = X86ISD::FNMADD; + break; + case X86ISD::FNMADD: + NewOpcode = X86ISD::FMSUB; + break; + case X86ISD::FNMSUB: + NewOpcode = ISD::FMA; + break; + case X86ISD::FMADD_RND: + NewOpcode = X86ISD::FNMSUB_RND; + break; + case X86ISD::FMSUB_RND: + NewOpcode = X86ISD::FNMADD_RND; + break; + case X86ISD::FNMADD_RND: + NewOpcode = X86ISD::FMSUB_RND; + break; + case X86ISD::FNMSUB_RND: + NewOpcode = X86ISD::FMADD_RND; + break; + // We can't handle scalar intrinsic node here because it would only + // invert one element and not the whole vector. But we could try to handle + // a negation of the lower element only. } } if (NewOpcode) - return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, - Arg.getNode()->ops())); + return DAG.getBitcast(OrigVT, + DAG.getNode(NewOpcode, DL, VT, Arg.getNode()->ops())); return SDValue(); } @@ -40488,17 +41080,25 @@ SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); unsigned IntOpcode; switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected FP logic op"); - case X86ISD::FOR: IntOpcode = ISD::OR; break; - case X86ISD::FXOR: IntOpcode = ISD::XOR; break; - case X86ISD::FAND: IntOpcode = ISD::AND; break; - case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + default: + llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: + IntOpcode = ISD::OR; + break; + case X86ISD::FXOR: + IntOpcode = ISD::XOR; + break; + case X86ISD::FAND: + IntOpcode = ISD::AND; + break; + case X86ISD::FANDN: + IntOpcode = X86ISD::ANDNP; + break; } SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); return DAG.getBitcast(VT, IntOp); } - /// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val) static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() != ISD::XOR) @@ -40688,13 +41288,18 @@ // into FMINC and FMAXC, which are Commutative operations. unsigned NewOp = 0; switch (N->getOpcode()) { - default: llvm_unreachable("unknown opcode"); - case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; - case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; + default: + llvm_unreachable("unknown opcode"); + case X86ISD::FMIN: + NewOp = X86ISD::FMINC; + break; + case X86ISD::FMAX: + NewOp = X86ISD::FMAXC; + break; } - return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1)); + return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), N->getOperand(0), + N->getOperand(1)); } static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, @@ -40732,8 +41337,8 @@ if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); - EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - VT); + EVT SetCCType = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); // There are 4 possibilities involving NaN inputs, and these are the required // outputs: @@ -40793,8 +41398,8 @@ // Turn ANDNP back to AND if input is inverted. if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) { - return DAG.getNode(ISD::AND, SDLoc(N), VT, - N->getOperand(0).getOperand(0), N->getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N->getOperand(0).getOperand(0), + N->getOperand(1)); } // Attempt to recursively combine a bitmask ANDNP with shuffles. @@ -40902,7 +41507,7 @@ //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND)) { + N0.getOpcode() == ISD::SIGN_EXTEND)) { SDValue N00 = N0.getOperand(0); // EXTLOAD has a better solution on AVX2, @@ -40912,8 +41517,8 @@ return SDValue(); if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { - SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, - N00, N1); + SDValue Tmp = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); } } @@ -41091,8 +41696,7 @@ // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections. assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); unsigned Scale = NumElts / EltSizeInBits; - EVT BroadcastVT = - EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); + EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); Vec = DAG.getBitcast(VT, Vec); @@ -41268,8 +41872,8 @@ // Only combine legal element types. EVT SVT = VT.getVectorElementType(); - if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && - SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) + if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && SVT != MVT::i64 && + SVT != MVT::f32 && SVT != MVT::f64) return SDValue(); // We can only do this if the vector size in 256 bits or less. @@ -41343,29 +41947,63 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { if (NegMul) { switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::FMA: Opcode = X86ISD::FNMADD; break; - case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; - case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; - case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMADD: Opcode = ISD::FMA; break; - case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; + default: + llvm_unreachable("Unexpected opcode"); + case ISD::FMA: + Opcode = X86ISD::FNMADD; + break; + case X86ISD::FMADD_RND: + Opcode = X86ISD::FNMADD_RND; + break; + case X86ISD::FMSUB: + Opcode = X86ISD::FNMSUB; + break; + case X86ISD::FMSUB_RND: + Opcode = X86ISD::FNMSUB_RND; + break; + case X86ISD::FNMADD: + Opcode = ISD::FMA; + break; + case X86ISD::FNMADD_RND: + Opcode = X86ISD::FMADD_RND; + break; + case X86ISD::FNMSUB: + Opcode = X86ISD::FMSUB; + break; + case X86ISD::FNMSUB_RND: + Opcode = X86ISD::FMSUB_RND; + break; } } if (NegAcc) { switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::FMA: Opcode = X86ISD::FMSUB; break; - case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; - case X86ISD::FMSUB: Opcode = ISD::FMA; break; - case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; - case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; - case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; + default: + llvm_unreachable("Unexpected opcode"); + case ISD::FMA: + Opcode = X86ISD::FMSUB; + break; + case X86ISD::FMADD_RND: + Opcode = X86ISD::FMSUB_RND; + break; + case X86ISD::FMSUB: + Opcode = ISD::FMA; + break; + case X86ISD::FMSUB_RND: + Opcode = X86ISD::FMADD_RND; + break; + case X86ISD::FNMADD: + Opcode = X86ISD::FNMSUB; + break; + case X86ISD::FNMADD_RND: + Opcode = X86ISD::FNMSUB_RND; + break; + case X86ISD::FNMSUB: + Opcode = X86ISD::FNMADD; + break; + case X86ISD::FNMSUB_RND: + Opcode = X86ISD::FNMADD_RND; + break; } } @@ -41437,11 +42075,20 @@ unsigned NewOpcode; switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected opcode!"); - case X86ISD::FMADDSUB: NewOpcode = X86ISD::FMSUBADD; break; - case X86ISD::FMADDSUB_RND: NewOpcode = X86ISD::FMSUBADD_RND; break; - case X86ISD::FMSUBADD: NewOpcode = X86ISD::FMADDSUB; break; - case X86ISD::FMSUBADD_RND: NewOpcode = X86ISD::FMADDSUB_RND; break; + default: + llvm_unreachable("Unexpected opcode!"); + case X86ISD::FMADDSUB: + NewOpcode = X86ISD::FMSUBADD; + break; + case X86ISD::FMADDSUB_RND: + NewOpcode = X86ISD::FMSUBADD_RND; + break; + case X86ISD::FMSUBADD: + NewOpcode = X86ISD::FMADDSUB; + break; + case X86ISD::FMSUBADD_RND: + NewOpcode = X86ISD::FMADDSUB_RND; + break; } if (N->getNumOperands() == 4) @@ -41462,8 +42109,7 @@ SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (N0.getOpcode() == ISD::AND && - N0.hasOneUse() && + if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { @@ -41476,8 +42122,7 @@ } } - if (N0.getOpcode() == ISD::TRUNCATE && - N0.hasOneUse() && + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { @@ -41570,9 +42215,8 @@ if ((OpSize == 128 && Subtarget.hasSSE2()) || (OpSize == 256 && Subtarget.hasAVX2()) || (OpSize == 512 && Subtarget.useAVX512Regs())) { - EVT VecVT = OpSize == 512 ? MVT::v16i32 : - OpSize == 256 ? MVT::v32i8 : - MVT::v16i8; + EVT VecVT = + OpSize == 512 ? MVT::v16i32 : OpSize == 256 ? MVT::v32i8 : MVT::v16i8; EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT; SDValue Cmp; if (IsOrXorXorCCZero) { @@ -41602,8 +42246,8 @@ // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); - SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL, - MVT::i32); + SDValue FFFFs = + DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL, MVT::i32); return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); } @@ -41679,8 +42323,8 @@ VT.getVectorNumElements() > 4) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16)) { - SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, - N->getOperand(2)); + SDValue Setcc = + DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, N->getOperand(2)); return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); } @@ -41769,8 +42413,9 @@ unsigned ScalarSize = Index.getScalarValueSizeInBits(); if (ScalarSize != 32 && ScalarSize != 64) { MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32; - EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - Index.getValueType().getVectorNumElements()); + EVT IndexVT = + EVT::getVectorVT(*DAG.getContext(), EltVT, + Index.getValueType().getVectorNumElements()); Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); SmallVector NewOps(N->op_begin(), N->op_end()); NewOps[4] = Index; @@ -41990,7 +42635,7 @@ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); - UI != UE; ++UI) { + UI != UE; ++UI) { SDNode *User = *UI; X86::CondCode CC; @@ -42011,12 +42656,18 @@ } switch (CC) { - default: break; - case X86::COND_A: case X86::COND_AE: - case X86::COND_B: case X86::COND_BE: - case X86::COND_O: case X86::COND_NO: - case X86::COND_G: case X86::COND_GE: - case X86::COND_L: case X86::COND_LE: + default: + break; + case X86::COND_A: + case X86::COND_AE: + case X86::COND_B: + case X86::COND_BE: + case X86::COND_O: + case X86::COND_NO: + case X86::COND_G: + case X86::COND_GE: + case X86::COND_L: + case X86::COND_LE: return true; } } @@ -42028,7 +42679,7 @@ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); - UI != UE; ++UI) { + UI != UE; ++UI) { SDNode *User = *UI; unsigned CCOpNo; @@ -42036,10 +42687,18 @@ default: // Be conservative. return false; - case X86ISD::SETCC: CCOpNo = 0; break; - case X86ISD::SETCC_CARRY: CCOpNo = 0; break; - case X86ISD::BRCOND: CCOpNo = 2; break; - case X86ISD::CMOV: CCOpNo = 2; break; + case X86ISD::SETCC: + CCOpNo = 0; + break; + case X86ISD::SETCC_CARRY: + CCOpNo = 0; + break; + case X86ISD::BRCOND: + CCOpNo = 2; + break; + case X86ISD::CMOV: + CCOpNo = 2; + break; } X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); @@ -42097,7 +42756,8 @@ unsigned NewOpc; switch (Op.getOpcode()) { - default: return SDValue(); + default: + return SDValue(); case ISD::AND: // Skip and with constant. We have special handling for and with immediate // during isel to generate test instructions. @@ -42105,8 +42765,12 @@ return SDValue(); NewOpc = X86ISD::AND; break; - case ISD::OR: NewOpc = X86ISD::OR; break; - case ISD::XOR: NewOpc = X86ISD::XOR; break; + case ISD::OR: + NewOpc = X86ISD::OR; + break; + case ISD::XOR: + NewOpc = X86ISD::XOR; + break; case ISD::ADD: // If the carry or overflow flag is used, we can't truncate. if (needCarryOrOverflowFlag(SDValue(N, 0))) @@ -42176,9 +42840,8 @@ if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { MVT VT = N->getSimpleValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); - return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, - N->getOperand(0), N->getOperand(1), - Flags); + return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, N->getOperand(0), + N->getOperand(1), Flags); } // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry) @@ -42199,29 +42862,27 @@ // If the LHS and RHS of the ADC node are zero, then it can't overflow and // the result is either zero or one (depending on the input carry bit). // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1. - if (X86::isZeroNode(N->getOperand(0)) && - X86::isZeroNode(N->getOperand(1)) && + if (X86::isZeroNode(N->getOperand(0)) && X86::isZeroNode(N->getOperand(1)) && // We don't have a good way to replace an EFLAGS use, so only do this when // dead right now. SDValue(N, 1).use_empty()) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); - SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, - DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, - MVT::i8), - N->getOperand(2)), - DAG.getConstant(1, DL, VT)); + SDValue Res1 = + DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + N->getOperand(2)), + DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { MVT VT = N->getSimpleValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); - return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, - N->getOperand(0), N->getOperand(1), - Flags); + return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, N->getOperand(0), + N->getOperand(1), Flags); } return SDValue(); @@ -42315,9 +42976,9 @@ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && EFLAGS.getValueType().isInteger() && !isa(EFLAGS.getOperand(1))) { - SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), - EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewSub = + DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, @@ -42411,8 +43072,8 @@ return SDValue(); SDLoc DL(N); - EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, - VT.getVectorNumElements()); + EVT ReducedVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, VT.getVectorNumElements() / 2); @@ -42428,8 +43089,8 @@ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0)); SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1)); - SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 }, - PMADDWDBuilder); + SDValue Madd = + SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, {N0, N1}, PMADDWDBuilder); // Fill the rest of the output with 0 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, DAG.getConstant(0, DL, MAddVT)); @@ -42517,8 +43178,7 @@ Op0 = BuildPSADBW(SadOp0, SadOp1); // It's possible we have a sad on the other side too. - if (Op1.getOpcode() == ISD::ABS && - detectZextAbsDiff(Op1, SadOp0, SadOp1)) { + if (Op1.getOpcode() == ISD::ABS && detectZextAbsDiff(Op1, SadOp0, SadOp1)) { Op1 = BuildPSADBW(SadOp0, SadOp1); } @@ -42655,8 +43315,7 @@ DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1])); }; return SplitOpsAndApply(DAG, Subtarget, DL, VT, - { Mul.getOperand(0), Mul.getOperand(1) }, - PMADDBuilder); + {Mul.getOperand(0), Mul.getOperand(1)}, PMADDBuilder); } // Attempt to turn this pattern into PMADDWD. @@ -42742,8 +43401,8 @@ std::swap(IdxN01, IdxN11); } // N0 indices be the even element. N1 indices must be the next odd element. - if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || - IdxN01 != 2 * i || IdxN11 != 2 * i + 1) + if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i || + IdxN11 != 2 * i + 1) return SDValue(); SDValue N00In = N00Elt.getOperand(0); SDValue N01In = N01Elt.getOperand(0); @@ -42776,8 +43435,7 @@ OpVT.getVectorNumElements() / 2); return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); }; - return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 }, - PMADDBuilder); + return SplitOpsAndApply(DAG, Subtarget, DL, VT, {In0, In1}, PMADDBuilder); } static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, @@ -42867,8 +43525,8 @@ // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { SubusLHS, SubusRHS }, USUBSATBuilder); + return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {SubusLHS, SubusRHS}, + USUBSATBuilder); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, @@ -42897,9 +43555,8 @@ SDValue NewSubusLHS = DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType); SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); - SDValue Psubus = - SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, - { NewSubusLHS, NewSubusRHS }, USUBSATBuilder); + SDValue Psubus = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, + {NewSubusLHS, NewSubusRHS}, USUBSATBuilder); // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); @@ -42920,8 +43577,7 @@ isa(Op1.getOperand(1))) { const APInt &XorC = Op1.getConstantOperandAPInt(1); EVT VT = Op0.getValueType(); - SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, - Op1.getOperand(0), + SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), DAG.getConstant(~XorC, SDLoc(Op1), VT)); return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor, DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT)); @@ -42990,11 +43646,9 @@ // If needed, look through bitcasts to get to the load. if (auto *FirstLd = dyn_cast(peekThroughBitcasts(Op0))) { bool Fast; - unsigned Alignment = FirstLd->getAlignment(); - unsigned AS = FirstLd->getAddressSpace(); const X86TargetLowering *TLI = Subtarget.getTargetLowering(); - if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, AS, - Alignment, &Fast) && + if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *FirstLd->getMemOperand(), &Fast) && Fast) { if (SDValue Ld = EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false)) @@ -43257,9 +43911,9 @@ SDValue WideVec = peekThroughBitcasts(N->getOperand(0)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && - TLI.isTypeLegal(WideVecVT) && - WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) { - auto isConcatenatedNot = [] (SDValue V) { + TLI.isTypeLegal(WideVecVT) && WideVecVT.getSizeInBits() == 256 && + WideVec.getOpcode() == ISD::AND) { + auto isConcatenatedNot = [](SDValue V) { V = peekThroughBitcasts(V); if (!isBitwiseNot(V)) return false; @@ -43428,8 +44082,8 @@ // Combine (ext_invec (ext_invec X)) -> (ext_invec X) const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (In.getOpcode() == N->getOpcode() && - TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType())) + if (In.getOpcode() == N->getOpcode() && TLI.isTypeLegal(VT) && + TLI.isTypeLegal(In.getOperand(0).getValueType())) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0)); // Attempt to combine as a shuffle. @@ -43448,7 +44102,8 @@ DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { - default: break; + default: + break; case ISD::SCALAR_TO_VECTOR: return combineScalarToVector(N, DAG); case ISD::EXTRACT_VECTOR_ELT: @@ -43463,57 +44118,99 @@ return combineExtractSubvector(N, DAG, DCI, Subtarget); case ISD::VSELECT: case ISD::SELECT: - case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget); - case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); - case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); - case X86ISD::CMP: return combineCMP(N, DAG); - case ISD::ADD: return combineAdd(N, DAG, Subtarget); - case ISD::SUB: return combineSub(N, DAG, Subtarget); + case X86ISD::BLENDV: + return combineSelect(N, DAG, DCI, Subtarget); + case ISD::BITCAST: + return combineBitcast(N, DAG, DCI, Subtarget); + case X86ISD::CMOV: + return combineCMov(N, DAG, DCI, Subtarget); + case X86ISD::CMP: + return combineCMP(N, DAG); + case ISD::ADD: + return combineAdd(N, DAG, Subtarget); + case ISD::SUB: + return combineSub(N, DAG, Subtarget); case X86ISD::ADD: - case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); - case X86ISD::SBB: return combineSBB(N, DAG); - case X86ISD::ADC: return combineADC(N, DAG, DCI); - case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); - case ISD::SHL: return combineShiftLeft(N, DAG); - case ISD::SRA: return combineShiftRightArithmetic(N, DAG); - case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI); - case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); - case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); - case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); - case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget); - case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); - case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); - case ISD::STORE: return combineStore(N, DAG, Subtarget); - case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); - case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); + case X86ISD::SUB: + return combineX86AddSub(N, DAG, DCI); + case X86ISD::SBB: + return combineSBB(N, DAG); + case X86ISD::ADC: + return combineADC(N, DAG, DCI); + case ISD::MUL: + return combineMul(N, DAG, DCI, Subtarget); + case ISD::SHL: + return combineShiftLeft(N, DAG); + case ISD::SRA: + return combineShiftRightArithmetic(N, DAG); + case ISD::SRL: + return combineShiftRightLogical(N, DAG, DCI); + case ISD::AND: + return combineAnd(N, DAG, DCI, Subtarget); + case ISD::OR: + return combineOr(N, DAG, DCI, Subtarget); + case ISD::XOR: + return combineXor(N, DAG, DCI, Subtarget); + case X86ISD::BEXTR: + return combineBEXTR(N, DAG, DCI, Subtarget); + case ISD::LOAD: + return combineLoad(N, DAG, DCI, Subtarget); + case ISD::MLOAD: + return combineMaskedLoad(N, DAG, DCI, Subtarget); + case ISD::STORE: + return combineStore(N, DAG, Subtarget); + case ISD::MSTORE: + return combineMaskedStore(N, DAG, DCI, Subtarget); + case ISD::SINT_TO_FP: + return combineSIntToFP(N, DAG, Subtarget); + case ISD::UINT_TO_FP: + return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: - case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); - case ISD::FNEG: return combineFneg(N, DAG, Subtarget); - case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); - case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); - case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); - case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); + case ISD::FSUB: + return combineFaddFsub(N, DAG, Subtarget); + case ISD::FNEG: + return combineFneg(N, DAG, Subtarget); + case ISD::TRUNCATE: + return combineTruncate(N, DAG, Subtarget); + case X86ISD::ANDNP: + return combineAndnp(N, DAG, DCI, Subtarget); + case X86ISD::FAND: + return combineFAnd(N, DAG, Subtarget); + case X86ISD::FANDN: + return combineFAndn(N, DAG, Subtarget); case X86ISD::FXOR: - case X86ISD::FOR: return combineFOr(N, DAG, Subtarget); + case X86ISD::FOR: + return combineFOr(N, DAG, Subtarget); case X86ISD::FMIN: - case X86ISD::FMAX: return combineFMinFMax(N, DAG); + case X86ISD::FMAX: + return combineFMinFMax(N, DAG); case ISD::FMINNUM: - case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); + case ISD::FMAXNUM: + return combineFMinNumFMaxNum(N, DAG, Subtarget); case X86ISD::CVTSI2P: - case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); - case X86ISD::BT: return combineBT(N, DAG, DCI); + case X86ISD::CVTUI2P: + return combineX86INT_TO_FP(N, DAG, DCI); + case X86ISD::BT: + return combineBT(N, DAG, DCI); case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget); - case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); - case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); + case ISD::ZERO_EXTEND: + return combineZext(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND: + return combineSext(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND_INREG: + return combineSignExtendInReg(N, DAG, Subtarget); case ISD::ANY_EXTEND_VECTOR_INREG: - case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, Subtarget); - case ISD::SETCC: return combineSetCC(N, DAG, Subtarget); - case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); - case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return combineExtInVec(N, DAG, Subtarget); + case ISD::SETCC: + return combineSetCC(N, DAG, Subtarget); + case X86ISD::SETCC: + return combineX86SetCC(N, DAG, Subtarget); + case X86ISD::BRCOND: + return combineBrCond(N, DAG, Subtarget); case X86ISD::PACKSS: - case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget); + case X86ISD::PACKUS: + return combineVectorPack(N, DAG, DCI, Subtarget); case X86ISD::VSHL: case X86ISD::VSRA: case X86ISD::VSRL: @@ -43523,8 +44220,9 @@ case X86ISD::VSRLI: return combineVectorShiftImm(N, DAG, DCI, Subtarget); case X86ISD::PINSRB: - case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); - case X86ISD::SHUFP: // Handle all target specific shuffles + case X86ISD::PINSRW: + return combineVectorInsert(N, DAG, DCI, Subtarget); + case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: case X86ISD::EXTRQI: case X86ISD::INSERTQI: @@ -43556,7 +44254,8 @@ case X86ISD::VPERM2X128: case X86ISD::SHUF128: case X86ISD::VZEXT_MOVL: - case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); + case ISD::VECTOR_SHUFFLE: + return combineShuffle(N, DAG, DCI, Subtarget); case X86ISD::FMADD_RND: case X86ISD::FMSUB: case X86ISD::FMSUB_RND: @@ -43564,20 +44263,26 @@ case X86ISD::FNMADD_RND: case X86ISD::FNMSUB: case X86ISD::FNMSUB_RND: - case ISD::FMA: return combineFMA(N, DAG, Subtarget); + case ISD::FMA: + return combineFMA(N, DAG, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: - case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); - case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); + case X86ISD::FMSUBADD: + return combineFMADDSUB(N, DAG, Subtarget); + case X86ISD::MOVMSK: + return combineMOVMSK(N, DAG, DCI); case X86ISD::MGATHER: case X86ISD::MSCATTER: case ISD::MGATHER: - case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget); + case ISD::MSCATTER: + return combineGatherScatter(N, DAG, DCI, Subtarget); case X86ISD::PCMPEQ: - case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); + case X86ISD::PCMPGT: + return combineVectorCompare(N, DAG, Subtarget); case X86ISD::PMULDQ: - case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI); + case X86ISD::PMULUDQ: + return combinePMULDQ(N, DAG, DCI); } return SDValue(); @@ -43629,7 +44334,7 @@ return true; } -SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl, +SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, SelectionDAG &DAG) const { const Module *M = DAG.getMachineFunction().getMMI().getModule(); @@ -43683,7 +44388,8 @@ bool Commute = false; switch (Op.getOpcode()) { - default: return false; + default: + return false; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -43716,8 +44422,7 @@ ((Commute && !isa(N1)) || (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op)))) return false; - if (IsFoldableAtomicRMW(N0, Op) || - (Commute && IsFoldableAtomicRMW(N1, Op))) + if (IsFoldableAtomicRMW(N0, Op) || (Commute && IsFoldableAtomicRMW(N1, Op))) return false; } } @@ -43726,9 +44431,8 @@ return true; } -bool X86TargetLowering:: - isDesirableToCombineBuildVectorToShuffleTruncate( - ArrayRef ShuffleMask, EVT SrcVT, EVT TruncVT) const { +bool X86TargetLowering::isDesirableToCombineBuildVectorToShuffleTruncate( + ArrayRef ShuffleMask, EVT SrcVT, EVT TruncVT) const { assert(SrcVT.getVectorNumElements() == ShuffleMask.size() && "Element count mismatch"); @@ -43800,7 +44504,8 @@ SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { - default: return false; + default: + return false; case 1: // FIXME: this should verify that we are targeting a 486 or better. If not, // we will turn this bswap into something that will be lowered to logical @@ -43847,9 +44552,9 @@ if (CI->getType()->isIntegerTy(64)) { InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); - if (Constraints.size() >= 2 && - Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && - Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { + if (Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && + Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && + Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && matchAsm(AsmPieces[1], {"bswap", "%edx"}) && @@ -43936,8 +44641,7 @@ default: break; } - } - else if (Constraint.size() == 2) { + } else if (Constraint.size() == 2) { switch (Constraint[0]) { default: break; @@ -43965,12 +44669,12 @@ /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight - X86TargetLowering::getSingleConstraintMatchWeight( +X86TargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); @@ -44004,36 +44708,37 @@ break; case 'Y': { unsigned Size = StringRef(constraint).size(); - // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y' + // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching + // 'Y' char NextChar = Size == 2 ? constraint[1] : 'i'; if (Size > 2) break; switch (NextChar) { - default: - return CW_Invalid; - // XMM0 - case 'z': - case '0': - if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) - return CW_SpecificReg; - return CW_Invalid; - // Conditional OpMask regs (AVX512) - case 'k': - if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) - return CW_Register; - return CW_Invalid; - // Any MMX reg - case 'm': - if (type->isX86_MMXTy() && Subtarget.hasMMX()) - return weight; + default: + return CW_Invalid; + // XMM0 + case 'z': + case '0': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) + return CW_SpecificReg; + return CW_Invalid; + // Conditional OpMask regs (AVX512) + case 'k': + if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) + return CW_Register; + return CW_Invalid; + // Any MMX reg + case 'm': + if (type->isX86_MMXTy() && Subtarget.hasMMX()) + return weight; + return CW_Invalid; + // Any SSE reg when ISA >= SSE2, same as 'Y' + case 'i': + case 't': + case '2': + if (!Subtarget.hasSSE2()) return CW_Invalid; - // Any SSE reg when ISA >= SSE2, same as 'Y' - case 'i': - case 't': - case '2': - if (!Subtarget.hasSSE2()) - return CW_Invalid; - break; + break; } // Fall through (handle "Y" constraint). LLVM_FALLTHROUGH; @@ -44114,8 +44819,7 @@ /// Try to replace an X constraint, which matches anything, with another that /// has more specific requirements based on the type of the corresponding /// operand. -const char *X86TargetLowering:: -LowerXConstraint(EVT ConstraintVT) const { +const char *X86TargetLowering::LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { @@ -44158,16 +44862,18 @@ /// If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector&Ops, + std::vector &Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints for now. - if (Constraint.length() > 1) return; + if (Constraint.length() > 1) + return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { - default: break; + default: + break; case 'I': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 31) { @@ -44241,8 +44947,8 @@ Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); break; } - // FIXME gcc accepts some relocatable values here too, but only in certain - // memory models; it's complicated. + // FIXME gcc accepts some relocatable values here too, but only in certain + // memory models; it's complicated. } return; } @@ -44265,8 +44971,8 @@ if (ConstantSDNode *CST = dyn_cast(Op)) { bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1; BooleanContent BCont = getBooleanContents(MVT::i64); - ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) - : ISD::SIGN_EXTEND; + ISD::NodeType ExtOpc = + IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND; int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue() : CST->getSExtValue(); Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64); @@ -44339,7 +45045,8 @@ if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { - default: break; + default: + break; // 'A' means [ER]AX + [ER]DX. case 'A': if (Subtarget.is64Bit()) @@ -44367,7 +45074,7 @@ return std::make_pair(0U, &X86::VK64RegClass); } break; - case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. + case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget.is64Bit()) { if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32RegClass); @@ -44381,7 +45088,7 @@ } LLVM_FALLTHROUGH; // 32-bit fallthrough - case 'Q': // Q_REGS + case 'Q': // Q_REGS if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32_ABCDRegClass); if (VT == MVT::i16) @@ -44391,8 +45098,8 @@ if (VT == MVT::i64) return std::make_pair(0U, &X86::GR64_ABCDRegClass); break; - case 'r': // GENERAL_REGS - case 'l': // INDEX_REGS + case 'r': // GENERAL_REGS + case 'l': // INDEX_REGS if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i16) @@ -44400,7 +45107,7 @@ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit()) return std::make_pair(0U, &X86::GR32RegClass); return std::make_pair(0U, &X86::GR64RegClass); - case 'R': // LEGACY_REGS + case 'R': // LEGACY_REGS if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8_NOREXRegClass); if (VT == MVT::i16) @@ -44408,7 +45115,7 @@ if (VT == MVT::i32 || !Subtarget.is64Bit()) return std::make_pair(0U, &X86::GR32_NOREXRegClass); return std::make_pair(0U, &X86::GR64_NOREXRegClass); - case 'f': // FP Stack registers. + case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) @@ -44416,19 +45123,23 @@ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) return std::make_pair(0U, &X86::RFP64RegClass); return std::make_pair(0U, &X86::RFP80RegClass); - case 'y': // MMX_REGS if MMX allowed. - if (!Subtarget.hasMMX()) break; + case 'y': // MMX_REGS if MMX allowed. + if (!Subtarget.hasMMX()) + break; return std::make_pair(0U, &X86::VR64RegClass); - case 'Y': // SSE_REGS if SSE2 allowed - if (!Subtarget.hasSSE2()) break; + case 'Y': // SSE_REGS if SSE2 allowed + if (!Subtarget.hasSSE2()) + break; LLVM_FALLTHROUGH; case 'v': - case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed - if (!Subtarget.hasSSE1()) break; + case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed + if (!Subtarget.hasSSE1()) + break; bool VConstraint = (Constraint[0] == 'v'); switch (VT.SimpleTy) { - default: break; + default: + break; // Scalar SSE types. case MVT::f32: case MVT::i32: @@ -44467,7 +45178,8 @@ case MVT::v16f32: case MVT::v16i32: case MVT::v8i64: - if (!Subtarget.hasAVX512()) break; + if (!Subtarget.hasAVX512()) + break; if (VConstraint) return std::make_pair(0U, &X86::VR512RegClass); return std::make_pair(0U, &X86::VR512_0_15RegClass); @@ -44483,11 +45195,13 @@ case '2': return getRegForInlineAsmConstraint(TRI, "Y", VT); case 'm': - if (!Subtarget.hasMMX()) break; + if (!Subtarget.hasMMX()) + break; return std::make_pair(0U, &X86::VR64RegClass); case 'z': case '0': - if (!Subtarget.hasSSE1()) break; + if (!Subtarget.hasSSE1()) + break; return std::make_pair(X86::XMM0, &X86::VR128RegClass); case 'k': // This register class doesn't allocate k0 for masked vector operation. @@ -44514,7 +45228,7 @@ // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. - std::pair Res; + std::pair Res; Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // Not found as a standard register? @@ -44572,7 +45286,7 @@ // turn into {ax},{dx}. // MVT::Other is used to specify clobber names. if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) - return Res; // Correct type already, nothing to do. + return Res; // Correct type already, nothing to do. // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should // return "eax". This should even work for things like getting 64bit integer @@ -44584,16 +45298,22 @@ // Therefore, use a helper method. if (isGRClass(*Class)) { unsigned Size = VT.getSizeInBits(); - if (Size == 1) Size = 8; + if (Size == 1) + Size = 8; unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); if (DestReg > 0) { bool is64Bit = Subtarget.is64Bit(); const TargetRegisterClass *RC = - Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) - : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) - : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) - : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) - : nullptr; + Size == 8 + ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) + : Size == 16 + ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) + : Size == 32 + ? (is64Bit ? &X86::GR32RegClass + : &X86::GR32_NOREXRegClass) + : Size == 64 + ? (is64Bit ? &X86::GR64RegClass : nullptr) + : nullptr; if (Size == 64 && !is64Bit) { // Model GCC's behavior here and select a fixed pair of 32-bit // registers. Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -415,7 +415,8 @@ assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(), LD->getAddressSpace(), - LD->getAlignment())) + LD->getAlignment(), + LD->getMemOperand()->getFlags())) return SDValue(); auto &TD = DAG.getDataLayout(); @@ -497,7 +498,8 @@ assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT"); if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(), ST->getAddressSpace(), - ST->getAlignment())) { + ST->getAlignment(), + ST->getMemOperand()->getFlags())) { return SDValue(); } unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment( @@ -1792,11 +1794,11 @@ break; case ISD::STORE: { // Replace unaligned store of unaligned load with memmove. - StoreSDNode *ST = cast(N); + StoreSDNode *ST = cast(N); if (!DCI.isBeforeLegalize() || - allowsMisalignedMemoryAccesses(ST->getMemoryVT(), - ST->getAddressSpace(), - ST->getAlignment()) || + allowsMisalignedMemoryAccesses(ST->getMemoryVT(), ST->getAddressSpace(), + ST->getAlignment(), + ST->getMemOperand()->getFlags()) || ST->isVolatile() || ST->isIndexed()) { break; }