Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -581,13 +581,17 @@ struct MemCmpExpansionOptions { // The list of available load sizes (in bytes), sorted in decreasing order. SmallVector LoadSizes; + // Set to true to allow overlapping loads. For example, 7-byte compares can + // be done with two 4-byte compares instead of 4+2+1-byte compares. This + // requires all loads in LoadSizes to be doable in an unaligned way. + bool AllowOverlappingLoads = false; }; const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const; /// Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; - /// Enable matching of interleaved access groups that contain predicated + /// Enable matching of interleaved access groups that contain predicated /// accesses or gaps and therefore vectorized using masked /// vector loads/stores. bool enableMaskedInterleavedAccessVectorization() const; @@ -772,7 +776,7 @@ /// \return The cost of a shuffle instruction of kind Kind and of type Tp. /// The index and subtype parameters are used by the subvector insertion and /// extraction shuffle kinds to show the insert/extract point and the type of - /// the subvector being inserted/extracted. + /// the subvector being inserted/extracted. /// NOTE: For subvector extractions Tp represents the source type. int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, Type *SubTp = nullptr) const; Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -1746,6 +1746,16 @@ return false; } + /// Return true if it is more correct/profitable to use strict FP_TO_INT + /// conversion operations - canonicalizing the FP source value instead of + /// converting all cases and then selecting based on value. + /// This may be true if the target throws exceptions for out of bounds + /// conversions or has fast FP CMOV. + virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, + bool IsSigned) const { + return false; + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. Index: lib/CodeGen/ExpandMemCmp.cpp =================================================================== --- lib/CodeGen/ExpandMemCmp.cpp +++ lib/CodeGen/ExpandMemCmp.cpp @@ -66,23 +66,18 @@ // Represents the decomposition in blocks of the expansion. For example, // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. - // TODO(courbet): Involve the target more in this computation. On X86, 7 - // bytes can be done more efficiently with two overlaping 4-byte loads than - // covering the interval with [{4, 0},{2, 4},{1, 6}}. struct LoadEntry { LoadEntry(unsigned LoadSize, uint64_t Offset) : LoadSize(LoadSize), Offset(Offset) { - assert(Offset % LoadSize == 0 && "invalid load entry"); } - uint64_t getGEPIndex() const { return Offset / LoadSize; } - // The size of the load for this block, in bytes. - const unsigned LoadSize; - // The offset of this load WRT the base pointer, in bytes. - const uint64_t Offset; + unsigned LoadSize; + // The offset of this load from the base pointer, in bytes. + uint64_t Offset; }; - SmallVector LoadSequence; + using LoadEntryVector = SmallVector; + LoadEntryVector LoadSequence; void createLoadCmpBlocks(); void createResultBlock(); @@ -92,13 +87,23 @@ void emitLoadCompareBlock(unsigned BlockIndex); void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, unsigned &LoadIndex); - void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); + void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes); void emitMemCmpResultBlock(); Value *getMemCmpExpansionZeroCase(); Value *getMemCmpEqZeroOneBlock(); Value *getMemCmpOneBlock(); + Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType, + uint64_t OffsetBytes); + + static LoadEntryVector + computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes, + unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte); + static LoadEntryVector + computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize, + unsigned MaxNumLoads, + unsigned &NumLoadsNonOneByte); - public: +public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, unsigned MaxNumLoads, const bool IsUsedForZeroCmp, @@ -110,6 +115,75 @@ Value *getMemCmpExpansion(); }; +MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence( + uint64_t Size, llvm::ArrayRef LoadSizes, + const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) { + NumLoadsNonOneByte = 0; + LoadEntryVector LoadSequence; + uint64_t Offset = 0; + while (Size && !LoadSizes.empty()) { + const unsigned LoadSize = LoadSizes.front(); + const uint64_t NumLoadsForThisSize = Size / LoadSize; + if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { + // Do not expand if the total number of loads is larger than what the + // target allows. Note that it's important that we exit before completing + // the expansion to avoid using a ton of memory to store the expansion for + // large sizes. + return {}; + } + if (NumLoadsForThisSize > 0) { + for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) { + ++NumLoadsNonOneByte; + } + Size = Size % LoadSize; + } + LoadSizes = LoadSizes.drop_front(); + } + return LoadSequence; +} + +MemCmpExpansion::LoadEntryVector +MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, + const unsigned MaxLoadSize, + const unsigned MaxNumLoads, + unsigned &NumLoadsNonOneByte) { + // These are already handled by the greedy approach. + if (Size < 2 || MaxLoadSize < 2) + return {}; + + // We try to do as many non-overlapping loads as possible starting from the + // beginning. + const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize; + assert(NumNonOverlappingLoads && "there must be at least one load"); + // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with + // an overlapping load. + Size = Size - NumNonOverlappingLoads * MaxLoadSize; + // Bail if the number of loads (non-overlapping + potential overlapping one) + // is larger than the max allowed. + if (NumNonOverlappingLoads + !!(Size > 0) > MaxNumLoads) + return {}; + + // Add non-overlapping loads. + LoadEntryVector LoadSequence; + uint64_t Offset = 0; + for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) { + LoadSequence.push_back({MaxLoadSize, Offset}); + Offset += MaxLoadSize; + } + + // Add the last overlapping load. + if (Size > 0) { + assert(Size < MaxLoadSize && "broken invariant"); + LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)}); + } + NumLoadsNonOneByte = 1; + return LoadSequence; +} + // Initialize the basic block structure required for expansion of memcmp call // with given maximum load size and memcmp size parameter. // This structure includes: @@ -133,38 +207,31 @@ Builder(CI) { assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. - size_t LoadSizeIndex = 0; - while (LoadSizeIndex < Options.LoadSizes.size() && - Options.LoadSizes[LoadSizeIndex] > Size) { - ++LoadSizeIndex; + llvm::ArrayRef LoadSizes(Options.LoadSizes); + while (!LoadSizes.empty() && LoadSizes.front() > Size) { + LoadSizes = LoadSizes.drop_front(); } - this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; + assert(!LoadSizes.empty() && "cannot load Size bytes"); + MaxLoadSize = LoadSizes.front(); // Compute the decomposition. - uint64_t CurSize = Size; - uint64_t Offset = 0; - while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { - const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; - assert(LoadSize > 0 && "zero load size"); - const uint64_t NumLoadsForThisSize = CurSize / LoadSize; - if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { - // Do not expand if the total number of loads is larger than what the - // target allows. Note that it's important that we exit before completing - // the expansion to avoid using a ton of memory to store the expansion for - // large sizes. - LoadSequence.clear(); - return; - } - if (NumLoadsForThisSize > 0) { - for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { - LoadSequence.push_back({LoadSize, Offset}); - Offset += LoadSize; - } - if (LoadSize > 1) { - ++NumLoadsNonOneByte; - } - CurSize = CurSize % LoadSize; + unsigned GreedyNumLoadsNonOneByte = 0; + LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads, + GreedyNumLoadsNonOneByte); + NumLoadsNonOneByte = GreedyNumLoadsNonOneByte; + assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); + // If we allow overlapping loads and the load sequence is not already optimal, + // use overlapping loads. + if (Options.AllowOverlappingLoads && + (LoadSequence.empty() || LoadSequence.size() > 2)) { + unsigned OverlappingNumLoadsNonOneByte = 0; + auto OverlappingLoads = computeOverlappingLoadSequence( + Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte); + if (!OverlappingLoads.empty() && + (LoadSequence.empty() || + OverlappingLoads.size() < LoadSequence.size())) { + LoadSequence = OverlappingLoads; + NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte; } - ++LoadSizeIndex; } assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); } @@ -189,30 +256,32 @@ EndBlock->getParent(), EndBlock); } +/// Return a pointer to an element of type `LoadSizeType` at offset +/// `OffsetBytes`. +Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, + Type *LoadSizeType, + uint64_t OffsetBytes) { + if (OffsetBytes > 0) { + auto *ByteType = Type::getInt8Ty(CI->getContext()); + Source = Builder.CreateGEP( + ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), + ConstantInt::get(ByteType, OffsetBytes)); + } + return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); +} + // This function creates the IR instructions for loading and comparing 1 byte. // It loads 1 byte from each source of the memcmp parameters with the given // GEPIndex. It then subtracts the two loaded values and adds this result to the // final phi node for selecting the memcmp result. void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, - unsigned GEPIndex) { - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - + unsigned OffsetBytes) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } + Value *Source1 = + getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes); + Value *Source2 = + getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes); Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); @@ -270,24 +339,10 @@ IntegerType *LoadSizeType = IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } + Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, + CurLoadEntry.Offset); + Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, + CurLoadEntry.Offset); // Get a constant or load a value for each source address. Value *LoadSrc1 = nullptr; @@ -378,8 +433,7 @@ const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; if (CurLoadEntry.LoadSize == 1) { - MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, - CurLoadEntry.getGEPIndex()); + MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset); return; } @@ -388,25 +442,12 @@ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } + Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, + CurLoadEntry.Offset); + Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, + CurLoadEntry.Offset); // Load LoadSizeType from the base address. Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); @@ -694,7 +735,6 @@ if (SizeVal == 0) { return false; } - // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9461,6 +9461,9 @@ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -9474,6 +9477,9 @@ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1736,6 +1736,20 @@ } break; } + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + APInt SrcUndef, SrcZero; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + KnownZero = SrcZero.zextOrTrunc(NumElts); + KnownUndef = SrcUndef.zextOrTrunc(NumElts); + break; + } case ISD::ADD: case ISD::SUB: case ISD::FADD: @@ -1755,6 +1769,9 @@ break; } case ISD::TRUNCATE: + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; @@ -4183,20 +4200,39 @@ return true; } - // Expand based on maximum range of FP_TO_SINT: - // True = fp_to_sint(Src) - // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000) - // Result = select (Src < 0x8000000000000000), True, False SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); - // TODO: Should any fast-math-flags be set for the FSUB? - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); - False = DAG.getNode(ISD::XOR, dl, DstVT, False, - DAG.getConstant(SignMask, dl, DstVT)); - Result = DAG.getSelect(dl, DstVT, Sel, True, False); + bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { + // Expand based on maximum range of FP_TO_SINT, if the value exceeds the + // signmask then offset (the result of which should be fully representable). + // Sel = Src < 0x8000000000000000 + // Val = select Sel, Src, Src - 0x8000000000000000 + // Ofs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Val) ^ Ofs + + // TODO: Should any fast-math-flags be set for the FSUB? + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, + DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); + Result = DAG.getNode(ISD::XOR, dl, DstVT, + DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + } else { + // Expand based on maximum range of FP_TO_SINT: + // True = fp_to_sint(Src) + // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000) + // Result = select (Src < 0x8000000000000000), True, False + + SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + // TODO: Should any fast-math-flags be set for the FSUB? + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, + DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + False = DAG.getNode(ISD::XOR, dl, DstVT, False, + DAG.getConstant(SignMask, dl, DstVT)); + Result = DAG.getSelect(dl, DstVT, Sel, True, False); + } return true; } Index: lib/DebugInfo/DWARF/DWARFDebugFrame.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -396,7 +396,8 @@ uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset); uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset); int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); - uint64_t ReturnAddressRegister = Data.getULEB128(&Offset); + uint64_t ReturnAddressRegister = + Version == 1 ? Data.getU8(&Offset) : Data.getULEB128(&Offset); // Parse the augmentation data for EH CIEs StringRef AugmentationData(""); Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -65,18 +65,6 @@ def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; -def FeaturePAN : SubtargetFeature< - "pan", "HasPAN", "true", - "Enables ARM v8.1 Privileged Access-Never extension">; - -def FeatureLOR : SubtargetFeature< - "lor", "HasLOR", "true", - "Enables ARM v8.1 Limited Ordering Regions extension">; - -def FeatureVH : SubtargetFeature< - "vh", "HasVH", "true", - "Enables ARM v8.1 Virtual Host extension">; - def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable ARMv8 PMUv3 Performance Monitors extension">; @@ -89,18 +77,6 @@ def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", "Enable Statistical Profiling extension">; -def FeaturePAN_RWV : SubtargetFeature< - "pan-rwv", "HasPAN_RWV", "true", - "Enable v8.2 PAN s1e1R and s1e1W Variants", - [FeaturePAN]>; - -// UAO PState -def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true", - "Enable v8.2 UAO PState">; - -def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", - "true", "Enable v8.2 data Cache Clean to Point of Persistence" >; - def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", "Enable Scalable Vector Extension (SVE) instructions">; @@ -219,66 +195,6 @@ "dotprod", "HasDotProd", "true", "Enable dot product support">; -def FeaturePA : SubtargetFeature< - "pa", "HasPA", "true", - "Enable v8.3-A Pointer Authentication enchancement">; - -def FeatureJS : SubtargetFeature< - "jsconv", "HasJS", "true", - "Enable v8.3-A JavaScript FP conversion enchancement", - [FeatureFPARMv8]>; - -def FeatureCCIDX : SubtargetFeature< - "ccidx", "HasCCIDX", "true", - "Enable v8.3-A Extend of the CCSIDR number of sets">; - -def FeatureComplxNum : SubtargetFeature< - "complxnum", "HasComplxNum", "true", - "Enable v8.3-A Floating-point complex number support", - [FeatureNEON]>; - -def FeatureNV : SubtargetFeature< - "nv", "HasNV", "true", - "Enable v8.4-A Nested Virtualization Enchancement">; - -def FeatureRASv8_4 : SubtargetFeature< - "rasv8_4", "HasRASv8_4", "true", - "Enable v8.4-A Reliability, Availability and Serviceability extension", - [FeatureRAS]>; - -def FeatureMPAM : SubtargetFeature< - "mpam", "HasMPAM", "true", - "Enable v8.4-A Memory system Partitioning and Monitoring extension">; - -def FeatureDIT : SubtargetFeature< - "dit", "HasDIT", "true", - "Enable v8.4-A Data Independent Timing instructions">; - -def FeatureTRACEV8_4 : SubtargetFeature< - "tracev8.4", "HasTRACEV8_4", "true", - "Enable v8.4-A Trace extension">; - -def FeatureAM : SubtargetFeature< - "am", "HasAM", "true", - "Enable v8.4-A Activity Monitors extension">; - -def FeatureSEL2 : SubtargetFeature< - "sel2", "HasSEL2", "true", - "Enable v8.4-A Secure Exception Level 2 extension">; - -def FeatureTLB_RMI : SubtargetFeature< - "tlb-rmi", "HasTLB_RMI", "true", - "Enable v8.4-A TLB Range and Maintenance Instructions">; - -def FeatureFMI : SubtargetFeature< - "fmi", "HasFMI", "true", - "Enable v8.4-A Flag Manipulation Instructions">; - -// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset -def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true", - "Enable v8.4-A RCPC instructions with Immediate Offsets", - [FeatureRCPC]>; - def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "NegativeImmediates", "false", "Convert immediates and instructions " @@ -316,7 +232,7 @@ "Enable execution and data prediction invalidation instructions" >; def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP", - "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >; + "true", "Enable Cache Clean to Point of Deep Persistence" >; def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI", "true", "Enable Branch Target Identification" >; @@ -332,22 +248,16 @@ // def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM, - FeaturePAN, FeatureLOR, FeatureVH]>; + "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>; def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, - FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; + "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>; def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", - "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePA, - FeatureJS, FeatureCCIDX, FeatureComplxNum]>; + "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>; def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", - "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, - FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT, - FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, - FeatureFMI, FeatureRCPC_IMMO]>; + "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>; def HasV8_5aOps : SubtargetFeature< "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -9989,10 +9989,9 @@ let Inst{4-0} = Rd; } -//8.3 CompNum - Floating-point complex number support multiclass SIMDThreeSameVectorComplexHSD opcode, Operand rottype, string asm, SDPatternOperator OpNode>{ - let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { + let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in { def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype, asm, ".4h", [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), @@ -10008,7 +10007,7 @@ (rottype i32:$rot)))]>; } - let Predicates = [HasComplxNum, HasNEON] in { + let Predicates = [HasV8_3a, HasNEON] in { def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype, asm, ".2s", [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), @@ -10064,7 +10063,7 @@ multiclass SIMDThreeSameVectorTiedComplexHSD opcode, Operand rottype, string asm, SDPatternOperator OpNode> { - let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { + let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in { def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64, rottype, asm, ".4h", [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), @@ -10080,7 +10079,7 @@ (rottype i32:$rot)))]>; } - let Predicates = [HasComplxNum, HasNEON] in { + let Predicates = [HasV8_3a, HasNEON] in { def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64, rottype, asm, ".2s", [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), @@ -10146,7 +10145,7 @@ // classes. multiclass SIMDIndexedTiedComplexHSD { - let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { + let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in { def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64, V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h", ".4h", ".h", []> { @@ -10162,9 +10161,9 @@ let Inst{11} = idx{1}; let Inst{21} = idx{0}; } - } // Predicates = HasComplxNum, HasNEON, HasFullFP16] + } // Predicates = [HasV8_3a,HasNEON,HasFullFP16] - let Predicates = [HasComplxNum, HasNEON] in { + let Predicates = [HasV8_3a,HasNEON] in { def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2, V128, V128, V128, VectorIndexD, rottype, asm, ".4s", ".4s", ".4s", ".s", []> { @@ -10172,7 +10171,7 @@ let Inst{11} = idx{0}; let Inst{21} = 0; } - } // Predicates = [HasComplxNum, HasNEON] + } // Predicates = [HasV8_3a,HasNEON] } //---------------------------------------------------------------------------- Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2382,9 +2382,8 @@ if (BaseOp1.getType() != BaseOp2.getType()) return false; - assert(BaseOp1.isReg() || - BaseOp1.isFI() && - "Only base registers and frame indices are supported."); + assert((BaseOp1.isReg() || BaseOp1.isFI()) && + "Only base registers and frame indices are supported."); // Check for both base regs and base FI. if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -24,54 +24,6 @@ AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicate<"HasV8_5aOps", "armv8.5a">; -def HasVH : Predicate<"Subtarget->hasVH()">, - AssemblerPredicate<"FeatureVH", "vh">; - -def HasLOR : Predicate<"Subtarget->hasLOR()">, - AssemblerPredicate<"FeatureLOR", "lor">; - -def HasPA : Predicate<"Subtarget->hasPA()">, - AssemblerPredicate<"FeaturePA", "pa">; - -def HasJS : Predicate<"Subtarget->hasJS()">, - AssemblerPredicate<"FeatureJS", "jsconv">; - -def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, - AssemblerPredicate<"FeatureCCIDX", "ccidx">; - -def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, - AssemblerPredicate<"FeatureComplxNum", "complxnum">; - -def HasNV : Predicate<"Subtarget->hasNV()">, - AssemblerPredicate<"FeatureNV", "nv">; - -def HasRASv8_4 : Predicate<"Subtarget->hasRASv8_4()">, - AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">; - -def HasMPAM : Predicate<"Subtarget->hasMPAM()">, - AssemblerPredicate<"FeatureMPAM", "mpam">; - -def HasDIT : Predicate<"Subtarget->hasDIT()">, - AssemblerPredicate<"FeatureDIT", "dit">; - -def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, - AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">; - -def HasAM : Predicate<"Subtarget->hasAM()">, - AssemblerPredicate<"FeatureAM", "am">; - -def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, - AssemblerPredicate<"FeatureSEL2", "sel2">; - -def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, - AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">; - -def HasFMI : Predicate<"Subtarget->hasFMI()">, - AssemblerPredicate<"FeatureFMI", "fmi">; - -def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, - AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">; - def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, @@ -558,7 +510,7 @@ def TSB : CRmSystemI { let CRm = 0b0010; let Inst{12} = 0; - let Predicates = [HasTRACEV8_4]; + let Predicates = [HasV8_4a]; } } @@ -650,7 +602,7 @@ } // These pointer authentication isntructions require armv8.3a -let Predicates = [HasPA] in { +let Predicates = [HasV8_3a] in { multiclass SignAuth prefix, bits<3> prefix_z, string asm> { def IA : SignAuthOneData; def IB : SignAuthOneData; @@ -690,17 +642,17 @@ defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; -} + // v8.3a floating point conversion for javascript + let Predicates = [HasV8_3a, HasFPARMv8] in + def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, + "fjcvtzs", []> { + let Inst{31} = 0; + } -// v8.3a floating point conversion for javascript -let Predicates = [HasJS, HasFPARMv8] in -def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, - "fjcvtzs", []> { - let Inst{31} = 0; -} // HasJS, HasFPARMv8 +} // HasV8_3a // v8.4 Flag manipulation instructions -let Predicates = [HasFMI] in { +let Predicates = [HasV8_4a] in { def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { let Inst{20-5} = 0b0000001000000000; } @@ -708,7 +660,7 @@ def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", "{\t$Rn, $imm, $mask}">; -} // HasFMI +} // HasV8_4a // v8.5 flag manipulation instructions let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { @@ -2677,9 +2629,8 @@ [(truncstorei8 GPR32z:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; -// Armv8.4 Weaker Release Consistency enhancements -// LDAPR & STLR with Immediate Offset instructions -let Predicates = [HasRCPC_IMMO] in { +// Armv8.4 LDAPR & STLR with Immediate Offset instruction +let Predicates = [HasV8_4a] in { defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; @@ -2964,7 +2915,7 @@ def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; -let Predicates = [HasLOR] in { +let Predicates = [HasV8_1a] in { // v8.1a "Limited Order Region" extension load-acquire instructions def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -83,33 +83,6 @@ bool HasFP16FML = false; bool HasSPE = false; - // ARMv8.1 extensions - bool HasVH = false; - bool HasPAN = false; - bool HasLOR = false; - - // ARMv8.2 extensions - bool HasPsUAO = false; - bool HasPAN_RWV = false; - bool HasCCPP = false; - - // ARMv8.3 extensions - bool HasPA = false; - bool HasJS = false; - bool HasCCIDX = false; - bool HasComplxNum = false; - - // ARMv8.4 extensions - bool HasNV = false; - bool HasRASv8_4 = false; - bool HasMPAM = false; - bool HasDIT = false; - bool HasTRACEV8_4 = false; - bool HasAM = false; - bool HasSEL2 = false; - bool HasTLB_RMI = false; - bool HasFMI = false; - bool HasRCPC_IMMO = false; // ARMv8.4 Crypto extensions bool HasSM4 = true; bool HasSHA3 = true; @@ -378,30 +351,6 @@ bool useAA() const override { return UseAA; } - bool hasVH() const { return HasVH; } - bool hasPAN() const { return HasPAN; } - bool hasLOR() const { return HasLOR; } - - bool hasPsUAO() const { return HasPsUAO; } - bool hasPAN_RWV() const { return HasPAN_RWV; } - bool hasCCPP() const { return HasCCPP; } - - bool hasPA() const { return HasPA; } - bool hasJS() const { return HasJS; } - bool hasCCIDX() const { return HasCCIDX; } - bool hasComplxNum() const { return HasComplxNum; } - - bool hasNV() const { return HasNV; } - bool hasRASv8_4() const { return HasRASv8_4; } - bool hasMPAM() const { return HasMPAM; } - bool hasDIT() const { return HasDIT; } - bool hasTRACEV8_4() const { return HasTRACEV8_4; } - bool hasAM() const { return HasAM; } - bool hasSEL2() const { return HasSEL2; } - bool hasTLB_RMI() const { return HasTLB_RMI; } - bool hasFMI() const { return HasFMI; } - bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } - bool useSmallAddressing() const { switch (TLInfo.getTargetMachine().getCodeModel()) { case CodeModel::Kernel: Index: lib/Target/AArch64/AArch64SystemOperands.td =================================================================== --- lib/Target/AArch64/AArch64SystemOperands.td +++ lib/Target/AArch64/AArch64SystemOperands.td @@ -14,25 +14,6 @@ include "llvm/TableGen/SearchableTable.td" -//===----------------------------------------------------------------------===// -// Features that, for the compiler, only enable system operands and PStates -//===----------------------------------------------------------------------===// - -def HasCCPP : Predicate<"Subtarget->hasCCPP()">, - AssemblerPredicate<"FeatureCCPP", "ccpp">; - -def HasPAN : Predicate<"Subtarget->hasPAN()">, - AssemblerPredicate<"FeaturePAN", - "ARM v8.1 Privileged Access-Never extension">; - -def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">, - AssemblerPredicate<"FeaturePsUAO", - "ARM v8.2 UAO PState extension (psuao)">; - -def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">, - AssemblerPredicate<"FeaturePAN_RWV", - "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">; - //===----------------------------------------------------------------------===// // AT (address translate) instruction options. //===----------------------------------------------------------------------===// @@ -64,7 +45,7 @@ def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>; def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>; -let Requires = [{ {AArch64::FeaturePAN_RWV} }] in { +let Requires = [{ {AArch64::HasV8_2aOps} }] in { def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>; def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>; } @@ -121,7 +102,7 @@ def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>; def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>; -let Requires = [{ {AArch64::FeatureCCPP} }] in +let Requires = [{ {AArch64::HasV8_2aOps} }] in def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>; let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in @@ -197,7 +178,7 @@ bits<4> Encoding; let Encoding = encoding; - code Requires = [{ {AArch64::FeatureTRACEV8_4} }]; + code Requires = [{ {AArch64::HasV8_4aOps} }]; } def : TSB<"csync", 0>; @@ -333,14 +314,13 @@ def : PState<"DAIFSet", 0b11110>; def : PState<"DAIFClr", 0b11111>; // v8.1a "Privileged Access Never" extension-specific PStates -let Requires = [{ {AArch64::FeaturePAN} }] in +let Requires = [{ {AArch64::HasV8_1aOps} }] in def : PState<"PAN", 0b00100>; - // v8.2a "User Access Override" extension-specific PStates -let Requires = [{ {AArch64::FeaturePsUAO} }] in +let Requires = [{ {AArch64::HasV8_2aOps} }] in def : PState<"UAO", 0b00011>; // v8.4a timining insensitivity of data processing instructions -let Requires = [{ {AArch64::FeatureDIT} }] in +let Requires = [{ {AArch64::HasV8_4aOps} }] in def : PState<"DIT", 0b11010>; // v8.5a Spectre Mitigation let Requires = [{ {AArch64::FeatureSSBS} }] in @@ -433,9 +413,8 @@ def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; -// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI) -let Requires = [{ {AArch64::FeatureTLB_RMI} }] in { // Armv8.4-A Outer Sharable TLB Maintenance instructions: +let Requires = [{ {AArch64::HasV8_4aOps} }] in { // op1 CRn CRm op2 def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; @@ -486,7 +465,7 @@ def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; -} //FeatureTLB_RMI +} // Armv8.5-A Prediction Restriction by Context instruction options: class PRCTX crm> : SearchableTable { @@ -561,10 +540,8 @@ def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>; def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>; def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>; - -//v8.3 CCIDX - extending the CCsIDr number of sets def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> { - let Requires = [{ {AArch64::FeatureCCIDX} }]; + let Requires = [{ {AArch64::HasV8_3aOps} }]; } def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>; def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>; @@ -602,7 +579,9 @@ def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>; def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>; def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>; -def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>; +def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> { + let Requires = [{ {AArch64::HasV8_2aOps} }]; +} def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>; def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>; def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>; @@ -672,7 +651,7 @@ // v8.1a "Limited Ordering Regions" extension-specific system register // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureLOR} }] in +let Requires = [{ {AArch64::HasV8_1aOps} }] in def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>; // v8.2a "RAS extension" registers @@ -1206,21 +1185,21 @@ def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>; // v8.1a "Privileged Access Never" extension-specific system registers -let Requires = [{ {AArch64::FeaturePAN} }] in +let Requires = [{ {AArch64::HasV8_1aOps} }] in def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>; // v8.1a "Limited Ordering Regions" extension-specific system registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureLOR} }] in { +let Requires = [{ {AArch64::HasV8_1aOps} }] in { def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>; def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>; def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>; def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>; } -// v8.1a "Virtualization Host extensions" system registers +// v8.1a "Virtualization hos extensions" system registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureVH} }] in { +let Requires = [{ {AArch64::HasV8_1aOps} }] in { def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>; def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>; def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>; @@ -1251,7 +1230,7 @@ } // v8.2a registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeaturePsUAO} }] in +let Requires = [{ {AArch64::HasV8_2aOps} }] in def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>; // v8.2a "Statistical Profiling extension" registers @@ -1288,7 +1267,7 @@ // v8.3a "Pointer authentication extension" registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeaturePA} }] in { +let Requires = [{ {AArch64::HasV8_3aOps} }] in { def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>; def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>; def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>; @@ -1301,8 +1280,8 @@ def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>; } -// v8.4 "Secure Exception Level 2 extension" -let Requires = [{ {AArch64::FeatureSEL2} }] in { +let Requires = [{ {AArch64::HasV8_4aOps} }] in { + // v8.4a "Virtualization secure second stage translation" registers // Op0 Op1 CRn CRm Op2 def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>; @@ -1320,22 +1299,18 @@ // v8.4a "Virtualization debug state" registers // Op0 Op1 CRn CRm Op2 def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>; -} // FeatureSEL2 // v8.4a RAS registers -// Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureRASv8_4} }] in { +// Op0 Op1 CRn CRm Op2 def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>; def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>; def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>; def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>; def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>; def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>; -} // FeatureRASv8_4 // v8.4a MPAM registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureMPAM} }] in { def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>; def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>; def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>; @@ -1352,11 +1327,9 @@ def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>; def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>; def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>; -} //FeatureMPAM -// v8.4a Activitiy Monitor registers +// v8.4a Activitiy monitor registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureAM} }] in { def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>; def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>; def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>; @@ -1405,7 +1378,6 @@ def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>; def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>; def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>; -} //FeatureAM // v8.4a Trace Extension registers // @@ -1414,24 +1386,19 @@ // but they are already defined above. // // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in { def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>; def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>; def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>; -} //FeatureTRACEV8_4 // v8.4a Timining insensitivity of data processing instructions -// DIT: Data Independent Timing instructions // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureDIT} }] in { def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>; -} //FeatureDIT // v8.4a Enhanced Support for Nested Virtualization // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureNV} }] in { def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>; -} //FeatureNV + +} // HasV8_4aOps // SVE control registers // Op0 Op1 CRn CRm Op2 Index: lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp =================================================================== --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2813,29 +2813,28 @@ const char *Name; const FeatureBitset Features; } ExtensionMap[] = { - {"crc", {AArch64::FeatureCRC}}, - {"sm4", {AArch64::FeatureSM4}}, - {"sha3", {AArch64::FeatureSHA3}}, - {"sha2", {AArch64::FeatureSHA2}}, - {"aes", {AArch64::FeatureAES}}, - {"crypto", {AArch64::FeatureCrypto}}, - {"fp", {AArch64::FeatureFPARMv8}}, - {"simd", {AArch64::FeatureNEON}}, - {"ras", {AArch64::FeatureRAS}}, - {"lse", {AArch64::FeatureLSE}}, - {"predctrl", {AArch64::FeaturePredCtrl}}, - {"ccdp", {AArch64::FeatureCacheDeepPersist}}, - {"mte", {AArch64::FeatureMTE}}, - {"tlb-rmi", {AArch64::FeatureTLB_RMI}}, - {"pan-rwv", {AArch64::FeaturePAN_RWV}}, - {"ccpp", {AArch64::FeatureCCPP}}, - // FIXME: Unsupported extensions - {"pan", {}}, - {"lor", {}}, - {"rdma", {}}, - {"profile", {}}, + { "crc", {AArch64::FeatureCRC} }, + { "sm4", {AArch64::FeatureSM4} }, + { "sha3", {AArch64::FeatureSHA3} }, + { "sha2", {AArch64::FeatureSHA2} }, + { "aes", {AArch64::FeatureAES} }, + { "crypto", {AArch64::FeatureCrypto} }, + { "fp", {AArch64::FeatureFPARMv8} }, + { "simd", {AArch64::FeatureNEON} }, + { "ras", {AArch64::FeatureRAS} }, + { "lse", {AArch64::FeatureLSE} }, + { "predctrl", {AArch64::FeaturePredCtrl} }, + { "ccdp", {AArch64::FeatureCacheDeepPersist} }, + { "mte", {AArch64::FeatureMTE} }, + + // FIXME: Unsupported extensions + { "pan", {} }, + { "lor", {} }, + { "rdma", {} }, + { "profile", {} }, }; + static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { if (FBS[AArch64::HasV8_1aOps]) Str += "ARMv8.1a"; Index: lib/Target/Hexagon/HexagonDepDecoders.h =================================================================== --- lib/Target/Hexagon/HexagonDepDecoders.h +++ lib/Target/Hexagon/HexagonDepDecoders.h @@ -11,8 +11,10 @@ // clang-format off +#if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-function" +#endif static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, const void *Decoder) { @@ -69,5 +71,9 @@ signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } + +#if defined(__clang__) #pragma clang diagnostic pop +#endif + // clang-format on Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1047,6 +1047,9 @@ bool decomposeMulByConstant(EVT VT, SDValue C) const override; + bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, + bool IsSigned) const override; + /// Return true if EXTRACT_SUBVECTOR is cheap for this result type /// with this index. bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4812,6 +4812,12 @@ (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); } +bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, + bool IsSigned) const { + // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available. + return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov(); +} + bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) @@ -24086,8 +24092,6 @@ unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); - Amt = peekThroughEXTRACT_SUBVECTORs(Amt); - if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) { if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { MVT EltVT = VT.getVectorElementType(); Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1886,7 +1886,7 @@ { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ }; static const CostTblEntry X64CostTbl[] = { // 64-bit targets - { ISD::BITREVERSE, MVT::i64, 14 } + { ISD::BITREVERSE, MVT::i64, 14 } }; static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets { ISD::BITREVERSE, MVT::i32, 14 }, @@ -2899,6 +2899,8 @@ Options.LoadSizes.push_back(4); Options.LoadSizes.push_back(2); Options.LoadSizes.push_back(1); + // All GPR loads can be unaligned, and vector loads too starting form SSE2. + Options.AllowOverlappingLoads = true; return Options; }(); return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions; Index: test/CodeGen/X86/fp-cvt.ll =================================================================== --- test/CodeGen/X86/fp-cvt.ll +++ test/CodeGen/X86/fp-cvt.ll @@ -483,29 +483,20 @@ ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) ; X64-X87-NEXT: fsub %st(1) +; X64-X87-NEXT: xorl %eax, %eax +; X64-X87-NEXT: fxch %st(1) +; X64-X87-NEXT: fucompi %st(2) +; X64-X87-NEXT: fcmovnbe %st(1), %st(0) +; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fld %st(1) +; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fucompi %st(1) -; X64-X87-NEXT: fstp %st(0) -; X64-X87-NEXT: jbe .LBB10_1 -; X64-X87-NEXT: # %bb.2: -; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-X87-NEXT: retq -; X64-X87-NEXT: .LBB10_1: -; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: shlq $63, %rax ; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-X87-NEXT: retq ; @@ -515,17 +506,14 @@ ; X64-SSSE3-NEXT: flds {{.*}}(%rip) ; X64-SSSE3-NEXT: fld %st(1) ; X64-SSSE3-NEXT: fsub %st(1) +; X64-SSSE3-NEXT: xorl %eax, %eax +; X64-SSSE3-NEXT: fxch %st(1) +; X64-SSSE3-NEXT: fucompi %st(2) +; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0) +; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fucompi %st(1) -; X64-SSSE3-NEXT: fstp %st(0) -; X64-SSSE3-NEXT: jbe .LBB10_1 -; X64-SSSE3-NEXT: # %bb.2: -; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-SSSE3-NEXT: retq -; X64-SSSE3-NEXT: .LBB10_1: -; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-SSSE3-NEXT: setbe %al +; X64-SSSE3-NEXT: shlq $63, %rax ; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-SSSE3-NEXT: retq %1 = fptoui x86_fp80 %a0 to i64 @@ -577,29 +565,20 @@ ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) ; X64-X87-NEXT: fsub %st(1) +; X64-X87-NEXT: xorl %eax, %eax +; X64-X87-NEXT: fxch %st(1) +; X64-X87-NEXT: fucompi %st(2) +; X64-X87-NEXT: fcmovnbe %st(1), %st(0) +; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fld %st(1) +; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fucompi %st(1) -; X64-X87-NEXT: fstp %st(0) -; X64-X87-NEXT: jbe .LBB11_1 -; X64-X87-NEXT: # %bb.2: -; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-X87-NEXT: retq -; X64-X87-NEXT: .LBB11_1: -; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: shlq $63, %rax ; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-X87-NEXT: retq ; @@ -609,17 +588,14 @@ ; X64-SSSE3-NEXT: flds {{.*}}(%rip) ; X64-SSSE3-NEXT: fld %st(1) ; X64-SSSE3-NEXT: fsub %st(1) +; X64-SSSE3-NEXT: xorl %eax, %eax +; X64-SSSE3-NEXT: fxch %st(1) +; X64-SSSE3-NEXT: fucompi %st(2) +; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0) +; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fucompi %st(1) -; X64-SSSE3-NEXT: fstp %st(0) -; X64-SSSE3-NEXT: jbe .LBB11_1 -; X64-SSSE3-NEXT: # %bb.2: -; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-SSSE3-NEXT: retq -; X64-SSSE3-NEXT: .LBB11_1: -; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-SSSE3-NEXT: setbe %al +; X64-SSSE3-NEXT: shlq $63, %rax ; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-SSSE3-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 Index: test/CodeGen/X86/memcmp-optsize.ll =================================================================== --- test/CodeGen/X86/memcmp-optsize.ll +++ test/CodeGen/X86/memcmp-optsize.ll @@ -639,17 +639,33 @@ } define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize { -; X86-LABEL: length24_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $16, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-NOSSE-LABEL: length24_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: length24_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu (%eax), %xmm2 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl ; ; X64-SSE2-LABEL: length24_eq: ; X64-SSE2: # %bb.0: @@ -683,17 +699,30 @@ } define i1 @length24_eq_const(i8* %X) nounwind optsize { -; X86-LABEL: length24_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $24 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $16, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-NOSSE-LABEL: length24_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: length24_eq_const: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movdqu (%eax), %xmm0 +; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pand %xmm1, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: setne %al +; X86-SSE2-NEXT: retl ; ; X64-SSE2-LABEL: length24_eq_const: ; X64-SSE2: # %bb.0: Index: test/CodeGen/X86/memcmp.ll =================================================================== --- test/CodeGen/X86/memcmp.ll +++ test/CodeGen/X86/memcmp.ll @@ -359,6 +359,33 @@ ret i1 %c } +define i1 @length7_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length7_eq: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl 3(%ecx), %ecx +; X86-NEXT: xorl (%eax), %edx +; X86-NEXT: xorl 3(%eax), %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length7_eq: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl 3(%rdi), %ecx +; X64-NEXT: xorl (%rsi), %eax +; X64-NEXT: xorl 3(%rsi), %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: setne %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length8: ; X86: # %bb.0: @@ -370,7 +397,7 @@ ; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB13_2 +; X86-NEXT: jne .LBB14_2 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx ; X86-NEXT: movl 4(%eax), %edx @@ -378,13 +405,13 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB13_3 -; X86-NEXT: .LBB13_2: # %res_block +; X86-NEXT: je .LBB14_3 +; X86-NEXT: .LBB14_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: setae %al ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB13_3: # %endblock +; X86-NEXT: .LBB14_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -450,6 +477,89 @@ ret i1 %c } +define i1 @length9_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length9_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $9 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length9_eq: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: xorq (%rsi), %rax +; X64-NEXT: movb 8(%rdi), %cl +; X64-NEXT: xorb 8(%rsi), %cl +; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: sete %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length10_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length10_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $10 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length10_eq: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: xorq (%rsi), %rax +; X64-NEXT: movzwl 8(%rdi), %ecx +; X64-NEXT: xorw 8(%rsi), %cx +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: sete %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length11_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length11_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $11 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length11_eq: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 3(%rdi), %rcx +; X64-NEXT: xorq (%rsi), %rax +; X64-NEXT: xorq 3(%rsi), %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: sete %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + define i1 @length12_eq(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length12_eq: ; X86: # %bb.0: @@ -495,7 +605,7 @@ ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB17_2 +; X64-NEXT: jne .LBB21_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx ; X64-NEXT: movl 8(%rsi), %edx @@ -503,18 +613,99 @@ ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB17_3 -; X64-NEXT: .LBB17_2: # %res_block +; X64-NEXT: je .LBB21_3 +; X64-NEXT: .LBB21_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: setae %al ; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB17_3: # %endblock +; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m } +define i1 @length13_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length13_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $13 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length13_eq: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 5(%rdi), %rcx +; X64-NEXT: xorq (%rsi), %rax +; X64-NEXT: xorq 5(%rsi), %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: sete %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length14_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $14 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length14_eq: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 6(%rdi), %rcx +; X64-NEXT: xorq (%rsi), %rax +; X64-NEXT: xorq 6(%rsi), %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: sete %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_eq(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length15_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $15 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length15_eq: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 7(%rdi), %rcx +; X64-NEXT: xorq (%rsi), %rax +; X64-NEXT: xorq 7(%rsi), %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: sete %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + ; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 define i32 @length16(i8* %X, i8* %Y) nounwind { @@ -535,7 +726,7 @@ ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB18_2 +; X64-NEXT: jne .LBB25_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx ; X64-NEXT: movq 8(%rsi), %rdx @@ -543,13 +734,13 @@ ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB18_3 -; X64-NEXT: .LBB18_2: # %res_block +; X64-NEXT: je .LBB25_3 +; X64-NEXT: .LBB25_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: setae %al ; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB18_3: # %endblock +; X64-NEXT: .LBB25_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m @@ -694,17 +885,45 @@ } define i1 @length24_eq(i8* %x, i8* %y) nounwind { -; X86-LABEL: length24_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $16, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-NOSSE-LABEL: length24_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length24_eq: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $24 +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: sete %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length24_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu (%eax), %xmm2 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl ; ; X64-SSE2-LABEL: length24_eq: ; X64-SSE2: # %bb.0: @@ -738,17 +957,42 @@ } define i1 @length24_eq_const(i8* %X) nounwind { -; X86-LABEL: length24_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $24 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $16, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-NOSSE-LABEL: length24_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length24_eq_const: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $24 +; X86-SSE1-NEXT: pushl $.L.str +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: setne %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length24_eq_const: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movdqu (%eax), %xmm0 +; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pand %xmm1, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: setne %al +; X86-SSE2-NEXT: retl ; ; X64-SSE2-LABEL: length24_eq_const: ; X64-SSE2: # %bb.0: @@ -1100,5 +1344,3 @@ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9223372036854775807) nounwind ret i32 %m } - - Index: test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- test/CodeGen/X86/scalar-fp-to-i64.ll +++ test/CodeGen/X86/scalar-fp-to-i64.ll @@ -1147,25 +1147,21 @@ ; ; SSE3_64_WIN-LABEL: x_to_u64: ; SSE3_64_WIN: # %bb.0: -; SSE3_64_WIN-NEXT: subq $16, %rsp +; SSE3_64_WIN-NEXT: pushq %rax ; SSE3_64_WIN-NEXT: fldt (%rcx) ; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE3_64_WIN-NEXT: fld %st(1) ; SSE3_64_WIN-NEXT: fsub %st(1) -; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp) -; SSE3_64_WIN-NEXT: fld %st(1) +; SSE3_64_WIN-NEXT: xorl %eax, %eax +; SSE3_64_WIN-NEXT: fxch %st(1) +; SSE3_64_WIN-NEXT: fucompi %st(2) +; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_64_WIN-NEXT: fstp %st(1) ; SSE3_64_WIN-NEXT: fisttpll (%rsp) -; SSE3_64_WIN-NEXT: fucompi %st(1) -; SSE3_64_WIN-NEXT: fstp %st(0) -; SSE3_64_WIN-NEXT: jbe .LBB4_1 -; SSE3_64_WIN-NEXT: # %bb.2: -; SSE3_64_WIN-NEXT: movq (%rsp), %rax -; SSE3_64_WIN-NEXT: addq $16, %rsp -; SSE3_64_WIN-NEXT: retq -; SSE3_64_WIN-NEXT: .LBB4_1: -; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax -; SSE3_64_WIN-NEXT: addq $16, %rsp +; SSE3_64_WIN-NEXT: setbe %al +; SSE3_64_WIN-NEXT: shlq $63, %rax +; SSE3_64_WIN-NEXT: xorq (%rsp), %rax +; SSE3_64_WIN-NEXT: popq %rcx ; SSE3_64_WIN-NEXT: retq ; ; SSE3_64_LIN-LABEL: x_to_u64: @@ -1174,17 +1170,14 @@ ; SSE3_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE3_64_LIN-NEXT: fld %st(1) ; SSE3_64_LIN-NEXT: fsub %st(1) +; SSE3_64_LIN-NEXT: xorl %eax, %eax +; SSE3_64_LIN-NEXT: fxch %st(1) +; SSE3_64_LIN-NEXT: fucompi %st(2) +; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_64_LIN-NEXT: fstp %st(1) ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; SSE3_64_LIN-NEXT: fld %st(1) -; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; SSE3_64_LIN-NEXT: fucompi %st(1) -; SSE3_64_LIN-NEXT: fstp %st(0) -; SSE3_64_LIN-NEXT: jbe .LBB4_1 -; SSE3_64_LIN-NEXT: # %bb.2: -; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; SSE3_64_LIN-NEXT: retq -; SSE3_64_LIN-NEXT: .LBB4_1: -; SSE3_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; SSE3_64_LIN-NEXT: setbe %al +; SSE3_64_LIN-NEXT: shlq $63, %rax ; SSE3_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; SSE3_64_LIN-NEXT: retq ; @@ -1246,37 +1239,27 @@ ; ; SSE2_64_WIN-LABEL: x_to_u64: ; SSE2_64_WIN: # %bb.0: -; SSE2_64_WIN-NEXT: subq $24, %rsp +; SSE2_64_WIN-NEXT: subq $16, %rsp ; SSE2_64_WIN-NEXT: fldt (%rcx) ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE2_64_WIN-NEXT: fld %st(1) ; SSE2_64_WIN-NEXT: fsub %st(1) +; SSE2_64_WIN-NEXT: xorl %eax, %eax +; SSE2_64_WIN-NEXT: fxch %st(1) +; SSE2_64_WIN-NEXT: fucompi %st(2) +; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_64_WIN-NEXT: fstp %st(1) ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx ; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; SSE2_64_WIN-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F -; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fld %st(1) -; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fucompi %st(1) -; SSE2_64_WIN-NEXT: fstp %st(0) -; SSE2_64_WIN-NEXT: jbe .LBB4_1 -; SSE2_64_WIN-NEXT: # %bb.2: -; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax -; SSE2_64_WIN-NEXT: addq $24, %rsp -; SSE2_64_WIN-NEXT: retq -; SSE2_64_WIN-NEXT: .LBB4_1: -; SSE2_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; SSE2_64_WIN-NEXT: setbe %al +; SSE2_64_WIN-NEXT: shlq $63, %rax ; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax -; SSE2_64_WIN-NEXT: addq $24, %rsp +; SSE2_64_WIN-NEXT: addq $16, %rsp ; SSE2_64_WIN-NEXT: retq ; ; SSE2_64_LIN-LABEL: x_to_u64: @@ -1285,29 +1268,20 @@ ; SSE2_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE2_64_LIN-NEXT: fld %st(1) ; SSE2_64_LIN-NEXT: fsub %st(1) +; SSE2_64_LIN-NEXT: xorl %eax, %eax +; SSE2_64_LIN-NEXT: fxch %st(1) +; SSE2_64_LIN-NEXT: fucompi %st(2) +; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_64_LIN-NEXT: fstp %st(1) ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; SSE2_64_LIN-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F -; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fld %st(1) -; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fucompi %st(1) -; SSE2_64_LIN-NEXT: fstp %st(0) -; SSE2_64_LIN-NEXT: jbe .LBB4_1 -; SSE2_64_LIN-NEXT: # %bb.2: -; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; SSE2_64_LIN-NEXT: retq -; SSE2_64_LIN-NEXT: .LBB4_1: -; SSE2_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; SSE2_64_LIN-NEXT: setbe %al +; SSE2_64_LIN-NEXT: shlq $63, %rax ; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; SSE2_64_LIN-NEXT: retq ; Index: test/CodeGen/X86/trunc-subvector.ll =================================================================== --- test/CodeGen/X86/trunc-subvector.ll +++ test/CodeGen/X86/trunc-subvector.ll @@ -108,6 +108,7 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3] @@ -227,6 +228,7 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3] Index: test/CodeGen/X86/vector-rotate-128.ll =================================================================== --- test/CodeGen/X86/vector-rotate-128.ll +++ test/CodeGen/X86/vector-rotate-128.ll @@ -761,7 +761,6 @@ ; SSE41-LABEL: splatvar_rotate_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: pslld %xmm2, %xmm3 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32] @@ -774,7 +773,6 @@ ; AVX1-LABEL: splatvar_rotate_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32] ; AVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1 @@ -786,7 +784,6 @@ ; AVX2-LABEL: splatvar_rotate_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 @@ -876,8 +873,6 @@ ; SSE41-LABEL: splatvar_rotate_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: psllw %xmm2, %xmm3 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] @@ -887,35 +882,20 @@ ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: splatvar_rotate_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_rotate_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_rotate_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: splatvar_rotate_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 Index: test/CodeGen/X86/vector-rotate-256.ll =================================================================== --- test/CodeGen/X86/vector-rotate-256.ll +++ test/CodeGen/X86/vector-rotate-256.ll @@ -602,14 +602,13 @@ ; ; AVX2-LABEL: splatvar_rotate_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %ymm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm1 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpslld %xmm2, %ymm0, %ymm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] -; AVX2-NEXT: vpsubd %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX2-NEXT: vpsrld %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_rotate_v8i32: @@ -687,26 +686,24 @@ ; ; AVX2-LABEL: splatvar_rotate_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %ymm2 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm1 +; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_rotate_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %ymm2 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm1 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_rotate_v16i16: Index: test/CodeGen/X86/vector-rotate-512.ll =================================================================== --- test/CodeGen/X86/vector-rotate-512.ll +++ test/CodeGen/X86/vector-rotate-512.ll @@ -343,26 +343,24 @@ ; ; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer %splat16 = sub <32 x i16> , %splat Index: test/MC/AArch64/armv8.2a-at.s =================================================================== --- test/MC/AArch64/armv8.2a-at.s +++ test/MC/AArch64/armv8.2a-at.s @@ -1,11 +1,9 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a %s -o - | FileCheck %s -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a,+pan-rwv %s -o - | FileCheck %s -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-pan-rwv %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR at s1e1rp, x1 at s1e1wp, x2 // CHECK: at s1e1rp, x1 // encoding: [0x01,0x79,0x08,0xd5] // CHECK: at s1e1wp, x2 // encoding: [0x22,0x79,0x08,0xd5] -// ERROR: error: AT S1E1RP requires pan-rwv -// ERROR: error: AT S1E1WP requires pan-rwv +// ERROR: error: AT S1E1RP requires ARMv8.2a +// ERROR: error: AT S1E1WP requires ARMv8.2a Index: test/MC/AArch64/armv8.2a-mmfr2.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8.2a-mmfr2.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR + + mrs x3, id_aa64mmfr2_el1 +// CHECK: mrs x3, ID_AA64MMFR2_EL1 // encoding: [0x43,0x07,0x38,0xd5] +// ERROR: error: expected readable system register Index: test/MC/AArch64/armv8.2a-persistent-memory.s =================================================================== --- test/MC/AArch64/armv8.2a-persistent-memory.s +++ test/MC/AArch64/armv8.2a-persistent-memory.s @@ -1,7 +1,6 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a -o - %s | FileCheck %s -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+ccpp -o - %s | FileCheck %s -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a -o - %s 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR dc cvap, x7 // CHECK: dc cvap, x7 // encoding: [0x27,0x7c,0x0b,0xd5] -// ERROR: error: DC CVAP requires ccpp +// ERROR: error: DC CVAP requires ARMv8.2a Index: test/MC/AArch64/armv8.3a-complex.s =================================================================== --- test/MC/AArch64/armv8.3a-complex.s +++ test/MC/AArch64/armv8.3a-complex.s @@ -1,44 +1,43 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,-fullfp16 -o - %s 2>%t | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=NO-FP16 +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,-fullfp16 < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FP16 // RUN: FileCheck --check-prefix=STDERR --check-prefix=STDERR-NO-FP16 %s < %t -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,+fullfp16 -o - %s 2>%t | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=FP16 +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,+fullfp16 < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 // RUN: FileCheck --check-prefix=STDERR --check-prefix=STDERR-FP16 %s < %t -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-v8.3a,+fullfp16,+complxnum -o - %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=FP16 +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-v8.3a,+fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=NO-V83A + + // ==== FCMLA vector ==== // Types fcmla v0.4h, v1.4h, v2.4h, #0 // FP16: fcmla v0.4h, v1.4h, v2.4h, #0 // encoding: [0x20,0xc4,0x42,0x2e] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.8h, v1.8h, v2.8h, #0 // FP16: fcmla v0.8h, v1.8h, v2.8h, #0 // encoding: [0x20,0xc4,0x42,0x6e] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2s, v1.2s, v2.2s, #0 // CHECK: fcmla v0.2s, v1.2s, v2.2s, #0 // encoding: [0x20,0xc4,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.4s, v1.4s, v2.4s, #0 // CHECK: fcmla v0.4s, v1.4s, v2.4s, #0 // encoding: [0x20,0xc4,0x82,0x6e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2d, v1.2d, v2.2d, #0 // CHECK: fcmla v0.2d, v1.2d, v2.2d, #0 // encoding: [0x20,0xc4,0xc2,0x6e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a // Rotations fcmla v0.2s, v1.2s, v2.2s, #0 // CHECK: fcmla v0.2s, v1.2s, v2.2s, #0 // encoding: [0x20,0xc4,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2s, v1.2s, v2.2s, #90 // CHECK: fcmla v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xcc,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2s, v1.2s, v2.2s, #180 // CHECK: fcmla v0.2s, v1.2s, v2.2s, #180 // encoding: [0x20,0xd4,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2s, v1.2s, v2.2s, #270 // CHECK: fcmla v0.2s, v1.2s, v2.2s, #270 // encoding: [0x20,0xdc,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a // Invalid rotations fcmla v0.2s, v1.2s, v2.2s, #1 @@ -53,28 +52,28 @@ fcadd v0.4h, v1.4h, v2.4h, #90 // FP16: fcadd v0.4h, v1.4h, v2.4h, #90 // encoding: [0x20,0xe4,0x42,0x2e] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcadd v0.8h, v1.8h, v2.8h, #90 // FP16: fcadd v0.8h, v1.8h, v2.8h, #90 // encoding: [0x20,0xe4,0x42,0x6e] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcadd v0.2s, v1.2s, v2.2s, #90 // CHECK: fcadd v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xe4,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcadd v0.4s, v1.4s, v2.4s, #90 // CHECK: fcadd v0.4s, v1.4s, v2.4s, #90 // encoding: [0x20,0xe4,0x82,0x6e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcadd v0.2d, v1.2d, v2.2d, #90 // CHECK: fcadd v0.2d, v1.2d, v2.2d, #90 // encoding: [0x20,0xe4,0xc2,0x6e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a // Rotations fcadd v0.2s, v1.2s, v2.2s, #90 // CHECK: fcadd v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xe4,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcadd v0.2s, v1.2s, v2.2s, #270 // CHECK: fcadd v0.2s, v1.2s, v2.2s, #270 // encoding: [0x20,0xf4,0x82,0x2e] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a // Invalid rotations fcadd v0.2s, v1.2s, v2.2s, #1 @@ -93,17 +92,17 @@ fcmla v0.4h, v1.4h, v2.h[0], #0 // FP16: fcmla v0.4h, v1.4h, v2.h[0], #0 // encoding: [0x20,0x10,0x42,0x2f] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.8h, v1.8h, v2.h[0], #0 // FP16: fcmla v0.8h, v1.8h, v2.h[0], #0 // encoding: [0x20,0x10,0x42,0x6f] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2s, v1.2s, v2.s[0], #0 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: invalid operand for instruction // NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: invalid operand for instruction fcmla v0.4s, v1.4s, v2.s[0], #0 // CHECK: fcmla v0.4s, v1.4s, v2.s[0], #0 // encoding: [0x20,0x10,0x82,0x6f] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.2d, v1.2d, v2.d[0], #0 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: invalid operand for instruction // NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: invalid operand for instruction @@ -111,26 +110,26 @@ // Rotations fcmla v0.4s, v1.4s, v2.s[0], #90 // CHECK: fcmla v0.4s, v1.4s, v2.s[0], #90 // encoding: [0x20,0x30,0x82,0x6f] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.4s, v1.4s, v2.s[0], #180 // CHECK: fcmla v0.4s, v1.4s, v2.s[0], #180 // encoding: [0x20,0x50,0x82,0x6f] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.4s, v1.4s, v2.s[0], #270 // CHECK: fcmla v0.4s, v1.4s, v2.s[0], #270 // encoding: [0x20,0x70,0x82,0x6f] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a // Valid indices fcmla v0.4h, v1.4h, v2.h[1], #0 // FP16: fcmla v0.4h, v1.4h, v2.h[1], #0 // encoding: [0x20,0x10,0x62,0x2f] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.8h, v1.8h, v2.h[3], #0 // FP16: fcmla v0.8h, v1.8h, v2.h[3], #0 // encoding: [0x20,0x18,0x62,0x6f] // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16 -// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a fcmla v0.4s, v1.4s, v2.s[1], #0 // CHECK: fcmla v0.4s, v1.4s, v2.s[1], #0 // encoding: [0x20,0x18,0x82,0x6f] -// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum +// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a // Invalid indices fcmla v0.4h, v1.4h, v2.h[2], #0 Index: test/MC/AArch64/armv8.3a-js.s =================================================================== --- test/MC/AArch64/armv8.3a-js.s +++ test/MC/AArch64/armv8.3a-js.s @@ -1,20 +1,10 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a -o - %s 2>&1 | \ -// RUN: FileCheck %s - -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+jsconv -o - %s 2>&1 | \ -// RUN: FileCheck %s - -// RUN: not llvm-mc -triple aarch64-none-linux-gnu %s 2>&1 | \ -// RUN: FileCheck --check-prefix=CHECK-JS %s - -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+jsconv,-fp-armv8 -o - %s 2>&1 |\ -// RUN: FileCheck --check-prefix=CHECK-REQ %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a < %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-REQ < %t %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,-fp-armv8 < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NOFP < %t %s fjcvtzs w0, d0 // CHECK: fjcvtzs w0, d0 // encoding: [0x00,0x00,0x7e,0x1e] - -// CHECK-JS: error: instruction requires: jsconv - -// NOJS: error: instruction requires: jsconv - -// CHECK-REQ: error: instruction requires: fp-armv8 jsconv +// CHECK-REQ: error: instruction requires: armv8.3a +// CHECK-NOFP: error: instruction requires: fp-armv8 Index: test/MC/AArch64/armv8.3a-signed-pointer.s =================================================================== --- test/MC/AArch64/armv8.3a-signed-pointer.s +++ test/MC/AArch64/armv8.3a-signed-pointer.s @@ -1,11 +1,7 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a -o - %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,ALL %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a < %s 2> %t | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-REQ %s < %t -// RUN: not llvm-mc -triple aarch64-none-linux-gnu %s -o - > %t.1 2>%t.2 -// RUN: FileCheck --check-prefixes=ALL,NOENC %s < %t.1 -// RUN: FileCheck --check-prefix=CHECK-REQ %s < %t.2 - -// ALL: .text mrs x0, apiakeylo_el1 mrs x0, apiakeyhi_el1 mrs x0, apibkeylo_el1 @@ -16,39 +12,28 @@ mrs x0, apdbkeyhi_el1 mrs x0, apgakeylo_el1 mrs x0, apgakeyhi_el1 -// ALL-EMPTY: -// ALL-EMPTY: -// CHECK-NEXT: mrs x0, APIAKeyLo_EL1 // encoding: [0x00,0x21,0x38,0xd5] -// CHECK-NEXT: mrs x0, APIAKeyHi_EL1 // encoding: [0x20,0x21,0x38,0xd5] -// CHECK-NEXT: mrs x0, APIBKeyLo_EL1 // encoding: [0x40,0x21,0x38,0xd5] -// CHECK-NEXT: mrs x0, APIBKeyHi_EL1 // encoding: [0x60,0x21,0x38,0xd5] -// CHECK-NEXT: mrs x0, APDAKeyLo_EL1 // encoding: [0x00,0x22,0x38,0xd5] -// CHECK-NEXT: mrs x0, APDAKeyHi_EL1 // encoding: [0x20,0x22,0x38,0xd5] -// CHECK-NEXT: mrs x0, APDBKeyLo_EL1 // encoding: [0x40,0x22,0x38,0xd5] -// CHECK-NEXT: mrs x0, APDBKeyHi_EL1 // encoding: [0x60,0x22,0x38,0xd5] -// CHECK-NEXT: mrs x0, APGAKeyLo_EL1 // encoding: [0x00,0x23,0x38,0xd5] -// CHECK-NEXT: mrs x0, APGAKeyHi_EL1 // encoding: [0x20,0x23,0x38,0xd5] + +// CHECK: mrs x0, APIAKeyLo_EL1 // encoding: [0x00,0x21,0x38,0xd5] +// CHECK: mrs x0, APIAKeyHi_EL1 // encoding: [0x20,0x21,0x38,0xd5] +// CHECK: mrs x0, APIBKeyLo_EL1 // encoding: [0x40,0x21,0x38,0xd5] +// CHECK: mrs x0, APIBKeyHi_EL1 // encoding: [0x60,0x21,0x38,0xd5] +// CHECK: mrs x0, APDAKeyLo_EL1 // encoding: [0x00,0x22,0x38,0xd5] +// CHECK: mrs x0, APDAKeyHi_EL1 // encoding: [0x20,0x22,0x38,0xd5] +// CHECK: mrs x0, APDBKeyLo_EL1 // encoding: [0x40,0x22,0x38,0xd5] +// CHECK: mrs x0, APDBKeyHi_EL1 // encoding: [0x60,0x22,0x38,0xd5] +// CHECK: mrs x0, APGAKeyLo_EL1 // encoding: [0x00,0x23,0x38,0xd5] +// CHECK: mrs x0, APGAKeyHi_EL1 // encoding: [0x20,0x23,0x38,0xd5] // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apiakeylo_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apiakeyhi_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apibkeylo_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apibkeyhi_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apdakeylo_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apdakeyhi_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apdbkeylo_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apdbkeyhi_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apgakeylo_el1 // CHECK-REQ: error: expected readable system register -// CHECK-REQ-NEXT: mrs x0, apgakeyhi_el1 msr apiakeylo_el1, x0 msr apiakeyhi_el1, x0 @@ -60,249 +45,191 @@ msr apdbkeyhi_el1, x0 msr apgakeylo_el1, x0 msr apgakeyhi_el1, x0 -// ALL-EMPTY: -// ALL-EMPTY: -// CHECK-NEXT: msr APIAKeyLo_EL1, x0 // encoding: [0x00,0x21,0x18,0xd5] -// CHECK-NEXT: msr APIAKeyHi_EL1, x0 // encoding: [0x20,0x21,0x18,0xd5] -// CHECK-NEXT: msr APIBKeyLo_EL1, x0 // encoding: [0x40,0x21,0x18,0xd5] -// CHECK-NEXT: msr APIBKeyHi_EL1, x0 // encoding: [0x60,0x21,0x18,0xd5] -// CHECK-NEXT: msr APDAKeyLo_EL1, x0 // encoding: [0x00,0x22,0x18,0xd5] -// CHECK-NEXT: msr APDAKeyHi_EL1, x0 // encoding: [0x20,0x22,0x18,0xd5] -// CHECK-NEXT: msr APDBKeyLo_EL1, x0 // encoding: [0x40,0x22,0x18,0xd5] -// CHECK-NEXT: msr APDBKeyHi_EL1, x0 // encoding: [0x60,0x22,0x18,0xd5] -// CHECK-NEXT: msr APGAKeyLo_EL1, x0 // encoding: [0x00,0x23,0x18,0xd5] -// CHECK-NEXT: msr APGAKeyHi_EL1, x0 // encoding: [0x20,0x23,0x18,0xd5] + +// CHECK: msr APIAKeyLo_EL1, x0 // encoding: [0x00,0x21,0x18,0xd5] +// CHECK: msr APIAKeyHi_EL1, x0 // encoding: [0x20,0x21,0x18,0xd5] +// CHECK: msr APIBKeyLo_EL1, x0 // encoding: [0x40,0x21,0x18,0xd5] +// CHECK: msr APIBKeyHi_EL1, x0 // encoding: [0x60,0x21,0x18,0xd5] +// CHECK: msr APDAKeyLo_EL1, x0 // encoding: [0x00,0x22,0x18,0xd5] +// CHECK: msr APDAKeyHi_EL1, x0 // encoding: [0x20,0x22,0x18,0xd5] +// CHECK: msr APDBKeyLo_EL1, x0 // encoding: [0x40,0x22,0x18,0xd5] +// CHECK: msr APDBKeyHi_EL1, x0 // encoding: [0x60,0x22,0x18,0xd5] +// CHECK: msr APGAKeyLo_EL1, x0 // encoding: [0x00,0x23,0x18,0xd5] +// CHECK: msr APGAKeyHi_EL1, x0 // encoding: [0x20,0x23,0x18,0xd5] // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apiakeylo_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apiakeyhi_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apibkeylo_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apibkeyhi_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apdakeylo_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apdakeyhi_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apdbkeylo_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apdbkeyhi_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apgakeylo_el1, x0 // CHECK-REQ: error: expected writable system register or pstate -// CHECK-REQ-NEXT: msr apgakeyhi_el1, x0 -// ALL-EMPTY: -// ALL-EMPTY: paciasp -// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] -// NOENC-NEXT: paciasp +// CHECK: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a autiasp -// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] -// NOENC-NEXT: autiasp +// CHECK: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a paciaz -// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] -// NOENC-NEXT: paciaz +// CHECK: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a autiaz -// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] -// NOENC-NEXT: autiaz +// CHECK: autiaz // encoding: [0x9f,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a pacia1716 -// CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] -// NOENC-NEXT: pacia1716 +// CHECK: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a autia1716 -// CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] -// NOENC-NEXT: autia1716 +// CHECK: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a pacibsp -// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] -// NOENC-NEXT: pacibsp +// CHECK: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a autibsp -// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] -// NOENC-NEXT: autibsp +// CHECK: autibsp // encoding: [0xff,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a pacibz -// CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] -// NOENC-NEXT: pacibz +// CHECK: pacibz // encoding: [0x5f,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a autibz -// CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] -// NOENC-NEXT: autibz +// CHECK: autibz // encoding: [0xdf,0x23,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a pacib1716 -// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] -// NOENC-NEXT: pacib1716 +// CHECK: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a autib1716 -// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] -// NOENC-NEXT: autib1716 +// CHECK: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a xpaclri -// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] -// NOENC-NEXT: xpaclri +// CHECK: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// CHECK-REQ-NOT: error: instruction requires: armv8.3a -// ALL-EMPTY: pacia x0, x1 -// CHECK-NEXT: pacia x0, x1 // encoding: [0x20,0x00,0xc1,0xda] -// CHECK-REQ-NEXT: ^ -// CHECK-REQ-NEXT: error: instruction requires: pa -// CHECK-REQ-NEXT: pacia x0, x1 +// CHECK: pacia x0, x1 // encoding: [0x20,0x00,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autia x0, x1 -// CHECK-NEXT: autia x0, x1 // encoding: [0x20,0x10,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autia x0, x1 +// CHECK: autia x0, x1 // encoding: [0x20,0x10,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacda x0, x1 -// CHECK-NEXT: pacda x0, x1 // encoding: [0x20,0x08,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacda x0, x1 +// CHECK: pacda x0, x1 // encoding: [0x20,0x08,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autda x0, x1 -// CHECK-NEXT: autda x0, x1 // encoding: [0x20,0x18,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autda x0, x1 +// CHECK: autda x0, x1 // encoding: [0x20,0x18,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacib x0, x1 -// CHECK-NEXT: pacib x0, x1 // encoding: [0x20,0x04,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacib x0, x1 +// CHECK: pacib x0, x1 // encoding: [0x20,0x04,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autib x0, x1 -// CHECK-NEXT: autib x0, x1 // encoding: [0x20,0x14,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autib x0, x1 +// CHECK: autib x0, x1 // encoding: [0x20,0x14,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacdb x0, x1 -// CHECK-NEXT: pacdb x0, x1 // encoding: [0x20,0x0c,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacdb x0, x1 +// CHECK: pacdb x0, x1 // encoding: [0x20,0x0c,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autdb x0, x1 -// CHECK-NEXT: autdb x0, x1 // encoding: [0x20,0x1c,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autdb x0, x1 +// CHECK: autdb x0, x1 // encoding: [0x20,0x1c,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacga x0, x1, x2 -// CHECK-NEXT: pacga x0, x1, x2 // encoding: [0x20,0x30,0xc2,0x9a] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacga x0, x1, x2 +// CHECK: pacga x0, x1, x2 // encoding: [0x20,0x30,0xc2,0x9a] +// CHECK-REQ: error: instruction requires: armv8.3a paciza x0 -// CHECK-NEXT: paciza x0 // encoding: [0xe0,0x23,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: paciza x0 +// CHECK: paciza x0 // encoding: [0xe0,0x23,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autiza x0 -// CHECK-NEXT: autiza x0 // encoding: [0xe0,0x33,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autiza x0 +// CHECK: autiza x0 // encoding: [0xe0,0x33,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacdza x0 -// CHECK-NEXT: pacdza x0 // encoding: [0xe0,0x2b,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacdza x0 +// CHECK: pacdza x0 // encoding: [0xe0,0x2b,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autdza x0 -// CHECK-NEXT: autdza x0 // encoding: [0xe0,0x3b,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autdza x0 +// CHECK: autdza x0 // encoding: [0xe0,0x3b,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacizb x0 -// CHECK-NEXT: pacizb x0 // encoding: [0xe0,0x27,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacizb x0 +// CHECK: pacizb x0 // encoding: [0xe0,0x27,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autizb x0 -// CHECK-NEXT: autizb x0 // encoding: [0xe0,0x37,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autizb x0 +// CHECK: autizb x0 // encoding: [0xe0,0x37,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a pacdzb x0 -// CHECK-NEXT: pacdzb x0 // encoding: [0xe0,0x2f,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: pacdzb x0 +// CHECK: pacdzb x0 // encoding: [0xe0,0x2f,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a autdzb x0 -// CHECK-NEXT: autdzb x0 // encoding: [0xe0,0x3f,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: autdzb x0 +// CHECK: autdzb x0 // encoding: [0xe0,0x3f,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a xpaci x0 -// CHECK-NEXT: xpaci x0 // encoding: [0xe0,0x43,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: xpaci x0 +// CHECK: xpaci x0 // encoding: [0xe0,0x43,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a xpacd x0 -// CHECK-NEXT: xpacd x0 // encoding: [0xe0,0x47,0xc1,0xda] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: xpacd x0 +// CHECK: xpacd x0 // encoding: [0xe0,0x47,0xc1,0xda] +// CHECK-REQ: error: instruction requires: armv8.3a braa x0, x1 -// CHECK-EMPTY: -// CHECK-NEXT: braa x0, x1 // encoding: [0x01,0x08,0x1f,0xd7] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: braa x0, x1 +// CHECK: braa x0, x1 // encoding: [0x01,0x08,0x1f,0xd7] +// CHECK-REQ: error: instruction requires: armv8.3a brab x0, x1 -// CHECK-NEXT: brab x0, x1 // encoding: [0x01,0x0c,0x1f,0xd7] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: brab x0, x1 +// CHECK: brab x0, x1 // encoding: [0x01,0x0c,0x1f,0xd7] +// CHECK-REQ: error: instruction requires: armv8.3a blraa x0, x1 -// CHECK-NEXT: blraa x0, x1 // encoding: [0x01,0x08,0x3f,0xd7] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: blraa x0, x1 +// CHECK: blraa x0, x1 // encoding: [0x01,0x08,0x3f,0xd7] +// CHECK-REQ: error: instruction requires: armv8.3a blrab x0, x1 -// CHECK-NEXT: blrab x0, x1 // encoding: [0x01,0x0c,0x3f,0xd7] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: blrab x0, x1 +// CHECK: blrab x0, x1 // encoding: [0x01,0x0c,0x3f,0xd7] +// CHECK-REQ: error: instruction requires: armv8.3a braaz x0 -// CHECK-EMPTY: -// CHECK-NEXT: braaz x0 // encoding: [0x1f,0x08,0x1f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: braaz x0 +// CHECK: braaz x0 // encoding: [0x1f,0x08,0x1f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a brabz x0 -// CHECK-NEXT: brabz x0 // encoding: [0x1f,0x0c,0x1f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: brabz x0 +// CHECK: brabz x0 // encoding: [0x1f,0x0c,0x1f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a blraaz x0 -// CHECK-NEXT: blraaz x0 // encoding: [0x1f,0x08,0x3f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: blraaz x0 +// CHECK: blraaz x0 // encoding: [0x1f,0x08,0x3f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a blrabz x0 -// CHECK-NEXT: blrabz x0 // encoding: [0x1f,0x0c,0x3f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: blrabz x0 +// CHECK: blrabz x0 // encoding: [0x1f,0x0c,0x3f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a retaa -// CHECK-NEXT: retaa // encoding: [0xff,0x0b,0x5f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: retaa +// CHECK: retaa // encoding: [0xff,0x0b,0x5f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a retab -// CHECK-NEXT: retab // encoding: [0xff,0x0f,0x5f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: retab +// CHECK: retab // encoding: [0xff,0x0f,0x5f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a eretaa -// CHECK-NEXT: eretaa // encoding: [0xff,0x0b,0x9f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: eretaa +// CHECK: eretaa // encoding: [0xff,0x0b,0x9f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a eretab -// CHECK-NEXT: eretab // encoding: [0xff,0x0f,0x9f,0xd6] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: eretab +// CHECK: eretab // encoding: [0xff,0x0f,0x9f,0xd6] +// CHECK-REQ: error: instruction requires: armv8.3a ldraa x0, [x1, 4088] -// CHECK-NEXT: ldraa x0, [x1, #4088] // encoding: [0x20,0xf4,0x3f,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldraa x0, [x1, 4088] +// CHECK: ldraa x0, [x1, #4088] // encoding: [0x20,0xf4,0x3f,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldraa x0, [x1, -4096] -// CHECK-NEXT: ldraa x0, [x1, #-4096] // encoding: [0x20,0x04,0x60,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldraa x0, [x1, -4096] +// CHECK: ldraa x0, [x1, #-4096] // encoding: [0x20,0x04,0x60,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldrab x0, [x1, 4088] -// CHECK-NEXT: ldrab x0, [x1, #4088] // encoding: [0x20,0xf4,0xbf,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldrab x0, [x1, 4088] +// CHECK: ldrab x0, [x1, #4088] // encoding: [0x20,0xf4,0xbf,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldrab x0, [x1, -4096] -// CHECK-NEXT: ldrab x0, [x1, #-4096] // encoding: [0x20,0x04,0xe0,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldrab x0, [x1, -4096] +// CHECK: ldrab x0, [x1, #-4096] // encoding: [0x20,0x04,0xe0,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldraa x0, [x1, 4088]! -// CHECK-NEXT: ldraa x0, [x1, #4088]! // encoding: [0x20,0xfc,0x3f,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldraa x0, [x1, 4088]! +// CHECK: ldraa x0, [x1, #4088]! // encoding: [0x20,0xfc,0x3f,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldraa x0, [x1, -4096]! -// CHECK-NEXT: ldraa x0, [x1, #-4096]! // encoding: [0x20,0x0c,0x60,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldraa x0, [x1, -4096]! +// CHECK: ldraa x0, [x1, #-4096]! // encoding: [0x20,0x0c,0x60,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldrab x0, [x1, 4088]! -// CHECK-NEXT: ldrab x0, [x1, #4088]! // encoding: [0x20,0xfc,0xbf,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldrab x0, [x1, 4088]! +// CHECK: ldrab x0, [x1, #4088]! // encoding: [0x20,0xfc,0xbf,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldrab x0, [x1, -4096]! -// CHECK-NEXT: ldrab x0, [x1, #-4096]! // encoding: [0x20,0x0c,0xe0,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldrab x0, [x1, -4096]! +// CHECK: ldrab x0, [x1, #-4096]! // encoding: [0x20,0x0c,0xe0,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldraa x0, [x1] -// CHECK-NEXT: ldraa x0, [x1] // encoding: [0x20,0x04,0x20,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldraa x0, [x1] +// CHECK: ldraa x0, [x1] // encoding: [0x20,0x04,0x20,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a ldrab x0, [x1] -// CHECK-NEXT: ldrab x0, [x1] // encoding: [0x20,0x04,0xa0,0xf8] -// CHECK-REQ: error: instruction requires: pa -// CHECK-REQ-NEXT: ldrab x0, [x1] +// CHECK: ldrab x0, [x1] // encoding: [0x20,0x04,0xa0,0xf8] +// CHECK-REQ: error: instruction requires: armv8.3a Index: test/MC/AArch64/armv8.4a-flag.s =================================================================== --- test/MC/AArch64/armv8.4a-flag.s +++ test/MC/AArch64/armv8.4a-flag.s @@ -1,14 +1,5 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a %s -o - | \ -// RUN: FileCheck %s - -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fmi %s -o - 2>&1 | \ -// RUN: FileCheck %s - -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a %s -o - 2>&1 | \ -// RUN: FileCheck %s --check-prefix=ERROR - -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-fmi %s -o - 2>&1 | \ -// RUN: FileCheck %s --check-prefix=ERROR +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s | FileCheck %s --check-prefix=CHECK +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR //------------------------------------------------------------------------------ // Armv8.4-A flag manipulation instructions @@ -30,24 +21,24 @@ //CHECK-NEXT: rmif x1, #63, #15 // encoding: [0x2f,0x84,0x1f,0xba] //CHECK-NEXT: rmif xzr, #63, #15 // encoding: [0xef,0x87,0x1f,0xba] -//ERROR: error: instruction requires: fmi -//ERROR-NEXT: cfinv -//ERROR-NEXT: ^ -//ERROR-NEXT: error: instruction requires: fmi -//ERROR-NEXT: setf8 w1 -//ERROR-NEXT: ^ -//ERROR-NEXT: error: instruction requires: fmi -//ERROR-NEXT: setf8 wzr -//ERROR-NEXT: ^ -//ERROR-NEXT: error: instruction requires: fmi -//ERROR-NEXT: setf16 w1 -//ERROR-NEXT: ^ -//ERROR-NEXT: error: instruction requires: fmi -//ERROR-NEXT: setf16 wzr -//ERROR-NEXT: ^ -//ERROR-NEXT: error: instruction requires: fmi -//ERROR-NEXT: rmif x1, #63, #15 -//ERROR-NEXT: ^ -//ERROR-NEXT: error: instruction requires: fmi -//ERROR-NEXT: rmif xzr, #63, #15 -//ERROR-NEXT: ^ +//CHECK-ERROR: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: cfinv +//CHECK-ERROR-NEXT: ^ +//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: setf8 w1 +//CHECK-ERROR-NEXT: ^ +//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: setf8 wzr +//CHECK-ERROR-NEXT: ^ +//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: setf16 w1 +//CHECK-ERROR-NEXT: ^ +//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: setf16 wzr +//CHECK-ERROR-NEXT: ^ +//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: rmif x1, #63, #15 +//CHECK-ERROR-NEXT: ^ +//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a +//CHECK-ERROR-NEXT: rmif xzr, #63, #15 +//CHECK-ERROR-NEXT: ^ Index: test/MC/AArch64/armv8.4a-ldst.s =================================================================== --- test/MC/AArch64/armv8.4a-ldst.s +++ test/MC/AArch64/armv8.4a-ldst.s @@ -1,8 +1,5 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a -o - %s | FileCheck %s -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a,+rcpc-immo -o - %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-rcpc-immo -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84 -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84 -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84 +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s | FileCheck %s --check-prefix=CHECK +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84 //------------------------------------------------------------------------------ // Armv8.4-A LDAPR and STLR instructions with immediate offsets @@ -143,168 +140,168 @@ //CHECK-NEXT: ldapur x13, [x4, #255] // encoding: [0x8d,0xf0,0x4f,0xd9] //CHECK-NEXT: ldapur x14, [sp, #9] // encoding: [0xee,0x93,0x40,0xd9] -//CHECK-NO-V84: error: instruction requires: rcpc-immo +//CHECK-NO-V84: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLURB WZR, [X10] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLURB W1, [X10] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLURB W1, [X10, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: stlurb w2, [x11, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLURB W3, [SP, #-3] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapurb wzr, [x12] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapurb w4, [x12] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapurb w4, [x12, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURB W5, [X13, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURB W6, [SP, #-2] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSB W7, [X14] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSB W7, [X14, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursb w8, [x15, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursb w9, [sp, #-1] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSB X0, [X16] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSB X0, [X16, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSB X1, [X17, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursb x2, [sp, #0] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursb x2, [sp] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: stlurh w10, [x18] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: stlurh w10, [x18, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLURH W11, [X19, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLURH W12, [SP, #1] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURH W13, [X20] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURH W13, [X20, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapurh w14, [x21, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURH W15, [SP, #2] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSH W16, [X22] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSH W16, [X22, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSH W17, [X23, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursh w18, [sp, #3] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursh x3, [x24] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursh x3, [x24, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSH X4, [X25, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSH X5, [SP, #4] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLUR W19, [X26] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLUR W19, [X26, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: stlur w20, [x27, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLUR W21, [SP, #5] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPUR W22, [X28] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPUR W22, [X28, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPUR W23, [X29, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapur w24, [sp, #6] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursw x6, [x30] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapursw x6, [x30, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSW X7, [X0, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPURSW X8, [SP, #7] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLUR X9, [X1] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLUR X9, [X1, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: stlur x10, [x2, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: STLUR X11, [SP, #8] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPUR X12, [X3] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPUR X12, [X3, #-256] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: LDAPUR X13, [X4, #255] //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo +//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a //CHECK-NO-V84-NEXT: ldapur x14, [sp, #9] //CHECK-NO-V84-NEXT: ^ Index: test/MC/AArch64/armv8.4a-tlb.s =================================================================== --- test/MC/AArch64/armv8.4a-tlb.s +++ test/MC/AArch64/armv8.4a-tlb.s @@ -1,9 +1,6 @@ // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s 2> %t | FileCheck %s --check-prefix=CHECK // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+tlb-rmi < %s 2> %t | FileCheck %s --check-prefix=CHECK -// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84 -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-tlb-rmi < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84 // Outer shareable TLB maintenance instructions: tlbi vmalle1os @@ -48,55 +45,55 @@ //CHECK-ERROR-NEXT: tlbi vae1os, sp //CHECK-ERROR-NEXT: ^ -//CHECK-NO-V84: error: TLBI VMALLE1OS requires tlb-rmi +//CHECK-NO-V84: error: TLBI VMALLE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vmalle1os //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vae1os, xzr //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vae1os, x0 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI ASIDE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI ASIDE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi aside1os, x1 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VAAE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VAAE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vaae1os, x2 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VALE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VALE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vale1os, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VAALE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VAALE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vaale1os, x4 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI IPAS2E1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI IPAS2E1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ipas2e1os, x5 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI IPAS2LE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI IPAS2LE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ipas2le1os, x6 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VAE2OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VAE2OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vae2os, x7 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VALE2OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VALE2OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vale2os, x8 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VMALLS12E1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VMALLS12E1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vmalls12e1os //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VAE3OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VAE3OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vae3os, x9 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI VALE3OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI VALE3OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi vale3os, x10 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI ALLE2OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI ALLE2OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi alle2os //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI ALLE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI ALLE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi alle1os //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI ALLE3OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI ALLE3OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi alle3os //CHECK-NO-V84-NEXT: ^ @@ -171,96 +168,96 @@ //CHECK-ERROR-NEXT: tlbi rvae1, sp //CHECK-ERROR-NEXT: ^ -//CHECK-NO-V84: error: TLBI RVAE1 requires tlb-rmi +//CHECK-NO-V84: error: TLBI RVAE1 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae1, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAAE1 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAAE1 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvaae1, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE1 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE1 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale1, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAALE1 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAALE1 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvaale1, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE1IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE1IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae1is, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAAE1IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAAE1IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvaae1is, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE1IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE1IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale1is, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAALE1IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAALE1IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvaale1is, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae1os, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAAE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAAE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvaae1os, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale1os, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAALE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAALE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvaale1os, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ripas2e1is, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ripas2le1is, x3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ripas2e1, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ripas2le1, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ripas2e1os, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi ripas2le1os, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE2 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE2 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae2, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE2 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE2 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale2, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE2IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE2IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae2is, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE2IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE2IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale2is, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE2OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE2OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae2os, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE2OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE2OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale2os, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE3 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE3 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae3, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE3 requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE3 requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale3, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE3IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE3IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae3is, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE3IS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE3IS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale3is, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVAE3OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVAE3OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvae3os, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale3os, X3 //CHECK-NO-V84-NEXT: ^ -//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires tlb-rmi +//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires ARMv8.4a //CHECK-NO-V84-NEXT: tlbi rvale3os, XZR //CHECK-NO-V84-NEXT: ^ Index: test/MC/AArch64/armv8.4a-trace.s =================================================================== --- test/MC/AArch64/armv8.4a-trace.s +++ test/MC/AArch64/armv8.4a-trace.s @@ -1,14 +1,5 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a -o - 2>&1 %s | \ -// RUN: FileCheck %s - -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+tracev8.4 -o - 2>&1 %s | \ -// RUN: FileCheck %s - -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a -o - %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=CHECK-ERROR - -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-tracev8.4 -o - %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes NOFEATURE,CHECK-ERROR +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s | FileCheck %s --check-prefix=CHECK +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR //------------------------------------------------------------------------------ // ARMV8.4-A Debug, Trace and PMU Extensions @@ -54,4 +45,4 @@ //CHECK-ERROR: mrs x0, TRFCR_EL12 //CHECK-ERROR: ^ -//CHECK-ERROR: error: instruction requires: tracev8.4 +//CHECK-ERROR: error: instruction requires: armv8.4a Index: test/Transforms/ExpandMemCmp/X86/memcmp.ll =================================================================== --- test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -130,11 +130,11 @@ ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] ; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i16* -; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i16* -; ALL-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2 -; ALL-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2 -; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] +; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* +; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4 +; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16* +; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]] ; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] ; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) ; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) @@ -178,11 +178,11 @@ ; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] ; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] +; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* +; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4 +; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32* +; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]] ; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] ; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) ; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) @@ -272,11 +272,11 @@ ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4 -; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 4 -; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* +; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16* +; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]] ; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] ; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) ; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) @@ -324,11 +324,11 @@ ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2 -; X64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 2 -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* +; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32* +; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]] ; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] ; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) ; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) @@ -394,11 +394,11 @@ ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i64* -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i64* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 1 -; X64-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[TMP11]], i64 1 -; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64* +; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i64* +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP11]] ; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] ; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) ; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) @@ -597,11 +597,11 @@ ; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2 -; X32-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2 -; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] +; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4 +; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* +; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] ; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 @@ -625,11 +625,11 @@ ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2 -; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -645,11 +645,11 @@ ; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2 -; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 @@ -668,11 +668,71 @@ } define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp_eq7( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7) -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; ALL-NEXT: ret i32 [[CONV]] +; X32-LABEL: @cmp_eq7( +; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3 +; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3 +; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] +; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X32-NEXT: ret i32 [[CONV]] +; +; X64_1LD-LABEL: @cmp_eq7( +; X64_1LD-NEXT: br label [[LOADBB:%.*]] +; X64_1LD: res_block: +; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] +; X64_1LD: loadbb: +; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD: loadbb1: +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD: endblock: +; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_1LD-NEXT: ret i32 [[CONV]] +; +; X64_2LD-LABEL: @cmp_eq7( +; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_2LD-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7) %cmp = icmp eq i32 %call, 0 @@ -687,11 +747,11 @@ ; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 1 -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] +; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4 +; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] ; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] @@ -794,11 +854,11 @@ ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4 -; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -814,11 +874,11 @@ ; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4 -; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 @@ -837,11 +897,57 @@ } define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp_eq11( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; ALL-NEXT: ret i32 [[CONV]] +; X32-LABEL: @cmp_eq11( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X32-NEXT: ret i32 [[CONV]] +; +; X64_1LD-LABEL: @cmp_eq11( +; X64_1LD-NEXT: br label [[LOADBB:%.*]] +; X64_1LD: res_block: +; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] +; X64_1LD: loadbb: +; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD: loadbb1: +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD: endblock: +; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_1LD-NEXT: ret i32 [[CONV]] +; +; X64_2LD-LABEL: @cmp_eq11( +; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_2LD-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) %cmp = icmp eq i32 %call, 0 @@ -868,11 +974,11 @@ ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32* -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2 -; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -888,11 +994,11 @@ ; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32* -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2 -; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 @@ -911,11 +1017,57 @@ } define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp_eq13( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; ALL-NEXT: ret i32 [[CONV]] +; X32-LABEL: @cmp_eq13( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X32-NEXT: ret i32 [[CONV]] +; +; X64_1LD-LABEL: @cmp_eq13( +; X64_1LD-NEXT: br label [[LOADBB:%.*]] +; X64_1LD: res_block: +; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] +; X64_1LD: loadbb: +; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD: loadbb1: +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD: endblock: +; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_1LD-NEXT: ret i32 [[CONV]] +; +; X64_2LD-LABEL: @cmp_eq13( +; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_2LD-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) %cmp = icmp eq i32 %call, 0 @@ -924,11 +1076,57 @@ } define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp_eq14( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; ALL-NEXT: ret i32 [[CONV]] +; X32-LABEL: @cmp_eq14( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X32-NEXT: ret i32 [[CONV]] +; +; X64_1LD-LABEL: @cmp_eq14( +; X64_1LD-NEXT: br label [[LOADBB:%.*]] +; X64_1LD: res_block: +; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] +; X64_1LD: loadbb: +; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD: loadbb1: +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD: endblock: +; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_1LD-NEXT: ret i32 [[CONV]] +; +; X64_2LD-LABEL: @cmp_eq14( +; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_2LD-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) %cmp = icmp eq i32 %call, 0 @@ -937,11 +1135,57 @@ } define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp_eq15( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; ALL-NEXT: ret i32 [[CONV]] +; X32-LABEL: @cmp_eq15( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X32-NEXT: ret i32 [[CONV]] +; +; X64_1LD-LABEL: @cmp_eq15( +; X64_1LD-NEXT: br label [[LOADBB:%.*]] +; X64_1LD: res_block: +; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] +; X64_1LD: loadbb: +; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD: loadbb1: +; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7 +; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7 +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD: endblock: +; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_1LD-NEXT: ret i32 [[CONV]] +; +; X64_2LD-LABEL: @cmp_eq15( +; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7 +; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7 +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; X64_2LD-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) %cmp = icmp eq i32 %call, 0 Index: test/tools/llvm-dwarfdump/X86/eh-frame-return-address-reg.s =================================================================== --- /dev/null +++ test/tools/llvm-dwarfdump/X86/eh-frame-return-address-reg.s @@ -0,0 +1,51 @@ +# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o +# RUN: llvm-dwarfdump -v %t.o | FileCheck %s + +# The format of the .eh_frame section is similar in +# format and purpose to the .debug_frame section. +# Version 1 is often used for .eh_frame, +# and also it was used for DWARF v2. For that case, +# return address register should be encoded as ubyte, +# while later versions use ULEB128. This test case +# checks that we are able to dump it correctly. + +# CHECK: .eh_frame contents: +# CHECK: 00000000 00000010 ffffffff CIE +# CHECK-NEXT: Version: 1 +# CHECK-NEXT: Augmentation: "zR" +# CHECK-NEXT: Code alignment factor: 1 +# CHECK-NEXT: Data alignment factor: 1 +# CHECK-NEXT: Return address column: 240 +# CHECK-NEXT: Augmentation data: 1A + +.text +.global _start +_start: + nop + +.section .eh_frame, "a" + .long 16 # Size + .long 0x00 # ID + .byte 0x01 # Version + + .byte 0x7A # Augmentation string: "zR" + .byte 0x52 + .byte 0x00 + + .byte 0x01 # Code alignment factor, ULEB128 + .byte 0x01 # Data alignment factor, ULEB128 + + .byte 0xF0 # Return address register, ubyte for version 1. + + .byte 0x01 # LEB128 + .byte 0x1A # DW_EH_PE_pcrel | DW_EH_PE_sdata2 + + .byte 0x00 + .byte 0x00 + .byte 0x00 + + .long 10 # Size + .long 24 # ID +fde: + .long _start - fde + .word 0